|
|
1.1 ! root 1: /* ! 2: * Copyright (C) 2009-2010 Nippon Telegraph and Telephone Corporation. ! 3: * ! 4: * This program is free software; you can redistribute it and/or ! 5: * modify it under the terms of the GNU General Public License version ! 6: * 2 as published by the Free Software Foundation. ! 7: * ! 8: * You should have received a copy of the GNU General Public License ! 9: * along with this program. If not, see <http://www.gnu.org/licenses/>. ! 10: */ ! 11: #ifdef _WIN32 ! 12: #include <windows.h> ! 13: #include <winsock2.h> ! 14: #include <ws2tcpip.h> ! 15: #else ! 16: #include <netdb.h> ! 17: #include <netinet/tcp.h> ! 18: ! 19: #define closesocket(s) close(s) ! 20: #endif ! 21: ! 22: #include "qemu-common.h" ! 23: #include "qemu-error.h" ! 24: #include "qemu_socket.h" ! 25: #include "block_int.h" ! 26: ! 27: #define SD_PROTO_VER 0x01 ! 28: ! 29: #define SD_DEFAULT_ADDR "localhost" ! 30: #define SD_DEFAULT_PORT "7000" ! 31: ! 32: #define SD_OP_CREATE_AND_WRITE_OBJ 0x01 ! 33: #define SD_OP_READ_OBJ 0x02 ! 34: #define SD_OP_WRITE_OBJ 0x03 ! 35: ! 36: #define SD_OP_NEW_VDI 0x11 ! 37: #define SD_OP_LOCK_VDI 0x12 ! 38: #define SD_OP_RELEASE_VDI 0x13 ! 39: #define SD_OP_GET_VDI_INFO 0x14 ! 40: #define SD_OP_READ_VDIS 0x15 ! 41: ! 42: #define SD_FLAG_CMD_WRITE 0x01 ! 43: #define SD_FLAG_CMD_COW 0x02 ! 44: ! 45: #define SD_RES_SUCCESS 0x00 /* Success */ ! 46: #define SD_RES_UNKNOWN 0x01 /* Unknown error */ ! 47: #define SD_RES_NO_OBJ 0x02 /* No object found */ ! 48: #define SD_RES_EIO 0x03 /* I/O error */ ! 49: #define SD_RES_VDI_EXIST 0x04 /* Vdi exists already */ ! 50: #define SD_RES_INVALID_PARMS 0x05 /* Invalid parameters */ ! 51: #define SD_RES_SYSTEM_ERROR 0x06 /* System error */ ! 52: #define SD_RES_VDI_LOCKED 0x07 /* Vdi is locked */ ! 53: #define SD_RES_NO_VDI 0x08 /* No vdi found */ ! 54: #define SD_RES_NO_BASE_VDI 0x09 /* No base vdi found */ ! 55: #define SD_RES_VDI_READ 0x0A /* Cannot read requested vdi */ ! 56: #define SD_RES_VDI_WRITE 0x0B /* Cannot write requested vdi */ ! 57: #define SD_RES_BASE_VDI_READ 0x0C /* Cannot read base vdi */ ! 58: #define SD_RES_BASE_VDI_WRITE 0x0D /* Cannot write base vdi */ ! 59: #define SD_RES_NO_TAG 0x0E /* Requested tag is not found */ ! 60: #define SD_RES_STARTUP 0x0F /* Sheepdog is on starting up */ ! 61: #define SD_RES_VDI_NOT_LOCKED 0x10 /* Vdi is not locked */ ! 62: #define SD_RES_SHUTDOWN 0x11 /* Sheepdog is shutting down */ ! 63: #define SD_RES_NO_MEM 0x12 /* Cannot allocate memory */ ! 64: #define SD_RES_FULL_VDI 0x13 /* we already have the maximum vdis */ ! 65: #define SD_RES_VER_MISMATCH 0x14 /* Protocol version mismatch */ ! 66: #define SD_RES_NO_SPACE 0x15 /* Server has no room for new objects */ ! 67: #define SD_RES_WAIT_FOR_FORMAT 0x16 /* Waiting for a format operation */ ! 68: #define SD_RES_WAIT_FOR_JOIN 0x17 /* Waiting for other nodes joining */ ! 69: #define SD_RES_JOIN_FAILED 0x18 /* Target node had failed to join sheepdog */ ! 70: ! 71: /* ! 72: * Object ID rules ! 73: * ! 74: * 0 - 19 (20 bits): data object space ! 75: * 20 - 31 (12 bits): reserved data object space ! 76: * 32 - 55 (24 bits): vdi object space ! 77: * 56 - 59 ( 4 bits): reserved vdi object space ! 78: * 60 - 63 ( 4 bits): object type indentifier space ! 79: */ ! 80: ! 81: #define VDI_SPACE_SHIFT 32 ! 82: #define VDI_BIT (UINT64_C(1) << 63) ! 83: #define VMSTATE_BIT (UINT64_C(1) << 62) ! 84: #define MAX_DATA_OBJS (UINT64_C(1) << 20) ! 85: #define MAX_CHILDREN 1024 ! 86: #define SD_MAX_VDI_LEN 256 ! 87: #define SD_MAX_VDI_TAG_LEN 256 ! 88: #define SD_NR_VDIS (1U << 24) ! 89: #define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22) ! 90: #define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS) ! 91: #define SECTOR_SIZE 512 ! 92: ! 93: #define SD_INODE_SIZE (sizeof(SheepdogInode)) ! 94: #define CURRENT_VDI_ID 0 ! 95: ! 96: typedef struct SheepdogReq { ! 97: uint8_t proto_ver; ! 98: uint8_t opcode; ! 99: uint16_t flags; ! 100: uint32_t epoch; ! 101: uint32_t id; ! 102: uint32_t data_length; ! 103: uint32_t opcode_specific[8]; ! 104: } SheepdogReq; ! 105: ! 106: typedef struct SheepdogRsp { ! 107: uint8_t proto_ver; ! 108: uint8_t opcode; ! 109: uint16_t flags; ! 110: uint32_t epoch; ! 111: uint32_t id; ! 112: uint32_t data_length; ! 113: uint32_t result; ! 114: uint32_t opcode_specific[7]; ! 115: } SheepdogRsp; ! 116: ! 117: typedef struct SheepdogObjReq { ! 118: uint8_t proto_ver; ! 119: uint8_t opcode; ! 120: uint16_t flags; ! 121: uint32_t epoch; ! 122: uint32_t id; ! 123: uint32_t data_length; ! 124: uint64_t oid; ! 125: uint64_t cow_oid; ! 126: uint32_t copies; ! 127: uint32_t rsvd; ! 128: uint64_t offset; ! 129: } SheepdogObjReq; ! 130: ! 131: typedef struct SheepdogObjRsp { ! 132: uint8_t proto_ver; ! 133: uint8_t opcode; ! 134: uint16_t flags; ! 135: uint32_t epoch; ! 136: uint32_t id; ! 137: uint32_t data_length; ! 138: uint32_t result; ! 139: uint32_t copies; ! 140: uint32_t pad[6]; ! 141: } SheepdogObjRsp; ! 142: ! 143: typedef struct SheepdogVdiReq { ! 144: uint8_t proto_ver; ! 145: uint8_t opcode; ! 146: uint16_t flags; ! 147: uint32_t epoch; ! 148: uint32_t id; ! 149: uint32_t data_length; ! 150: uint64_t vdi_size; ! 151: uint32_t base_vdi_id; ! 152: uint32_t copies; ! 153: uint32_t snapid; ! 154: uint32_t pad[3]; ! 155: } SheepdogVdiReq; ! 156: ! 157: typedef struct SheepdogVdiRsp { ! 158: uint8_t proto_ver; ! 159: uint8_t opcode; ! 160: uint16_t flags; ! 161: uint32_t epoch; ! 162: uint32_t id; ! 163: uint32_t data_length; ! 164: uint32_t result; ! 165: uint32_t rsvd; ! 166: uint32_t vdi_id; ! 167: uint32_t pad[5]; ! 168: } SheepdogVdiRsp; ! 169: ! 170: typedef struct SheepdogInode { ! 171: char name[SD_MAX_VDI_LEN]; ! 172: char tag[SD_MAX_VDI_TAG_LEN]; ! 173: uint64_t ctime; ! 174: uint64_t snap_ctime; ! 175: uint64_t vm_clock_nsec; ! 176: uint64_t vdi_size; ! 177: uint64_t vm_state_size; ! 178: uint16_t copy_policy; ! 179: uint8_t nr_copies; ! 180: uint8_t block_size_shift; ! 181: uint32_t snap_id; ! 182: uint32_t vdi_id; ! 183: uint32_t parent_vdi_id; ! 184: uint32_t child_vdi_id[MAX_CHILDREN]; ! 185: uint32_t data_vdi_id[MAX_DATA_OBJS]; ! 186: } SheepdogInode; ! 187: ! 188: /* ! 189: * 64 bit FNV-1a non-zero initial basis ! 190: */ ! 191: #define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL) ! 192: ! 193: /* ! 194: * 64 bit Fowler/Noll/Vo FNV-1a hash code ! 195: */ ! 196: static inline uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval) ! 197: { ! 198: unsigned char *bp = buf; ! 199: unsigned char *be = bp + len; ! 200: while (bp < be) { ! 201: hval ^= (uint64_t) *bp++; ! 202: hval += (hval << 1) + (hval << 4) + (hval << 5) + ! 203: (hval << 7) + (hval << 8) + (hval << 40); ! 204: } ! 205: return hval; ! 206: } ! 207: ! 208: static inline int is_data_obj_writeable(SheepdogInode *inode, unsigned int idx) ! 209: { ! 210: return inode->vdi_id == inode->data_vdi_id[idx]; ! 211: } ! 212: ! 213: static inline int is_data_obj(uint64_t oid) ! 214: { ! 215: return !(VDI_BIT & oid); ! 216: } ! 217: ! 218: static inline uint64_t data_oid_to_idx(uint64_t oid) ! 219: { ! 220: return oid & (MAX_DATA_OBJS - 1); ! 221: } ! 222: ! 223: static inline uint64_t vid_to_vdi_oid(uint32_t vid) ! 224: { ! 225: return VDI_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT); ! 226: } ! 227: ! 228: static inline uint64_t vid_to_vmstate_oid(uint32_t vid, uint32_t idx) ! 229: { ! 230: return VMSTATE_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT) | idx; ! 231: } ! 232: ! 233: static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx) ! 234: { ! 235: return ((uint64_t)vid << VDI_SPACE_SHIFT) | idx; ! 236: } ! 237: ! 238: static inline int is_snapshot(struct SheepdogInode *inode) ! 239: { ! 240: return !!inode->snap_ctime; ! 241: } ! 242: ! 243: #undef dprintf ! 244: #ifdef DEBUG_SDOG ! 245: #define dprintf(fmt, args...) \ ! 246: do { \ ! 247: fprintf(stdout, "%s %d: " fmt, __func__, __LINE__, ##args); \ ! 248: } while (0) ! 249: #else ! 250: #define dprintf(fmt, args...) ! 251: #endif ! 252: ! 253: typedef struct SheepdogAIOCB SheepdogAIOCB; ! 254: ! 255: typedef struct AIOReq { ! 256: SheepdogAIOCB *aiocb; ! 257: unsigned int iov_offset; ! 258: ! 259: uint64_t oid; ! 260: uint64_t base_oid; ! 261: uint64_t offset; ! 262: unsigned int data_len; ! 263: uint8_t flags; ! 264: uint32_t id; ! 265: ! 266: QLIST_ENTRY(AIOReq) outstanding_aio_siblings; ! 267: QLIST_ENTRY(AIOReq) aioreq_siblings; ! 268: } AIOReq; ! 269: ! 270: enum AIOCBState { ! 271: AIOCB_WRITE_UDATA, ! 272: AIOCB_READ_UDATA, ! 273: }; ! 274: ! 275: struct SheepdogAIOCB { ! 276: BlockDriverAIOCB common; ! 277: ! 278: QEMUIOVector *qiov; ! 279: ! 280: int64_t sector_num; ! 281: int nb_sectors; ! 282: ! 283: int ret; ! 284: enum AIOCBState aiocb_type; ! 285: ! 286: QEMUBH *bh; ! 287: void (*aio_done_func)(SheepdogAIOCB *); ! 288: ! 289: int canceled; ! 290: ! 291: QLIST_HEAD(aioreq_head, AIOReq) aioreq_head; ! 292: }; ! 293: ! 294: typedef struct BDRVSheepdogState { ! 295: SheepdogInode inode; ! 296: ! 297: uint32_t min_dirty_data_idx; ! 298: uint32_t max_dirty_data_idx; ! 299: ! 300: char name[SD_MAX_VDI_LEN]; ! 301: int is_snapshot; ! 302: ! 303: char *addr; ! 304: char *port; ! 305: int fd; ! 306: ! 307: uint32_t aioreq_seq_num; ! 308: QLIST_HEAD(outstanding_aio_head, AIOReq) outstanding_aio_head; ! 309: } BDRVSheepdogState; ! 310: ! 311: static const char * sd_strerror(int err) ! 312: { ! 313: int i; ! 314: ! 315: static const struct { ! 316: int err; ! 317: const char *desc; ! 318: } errors[] = { ! 319: {SD_RES_SUCCESS, "Success"}, ! 320: {SD_RES_UNKNOWN, "Unknown error"}, ! 321: {SD_RES_NO_OBJ, "No object found"}, ! 322: {SD_RES_EIO, "I/O error"}, ! 323: {SD_RES_VDI_EXIST, "VDI exists already"}, ! 324: {SD_RES_INVALID_PARMS, "Invalid parameters"}, ! 325: {SD_RES_SYSTEM_ERROR, "System error"}, ! 326: {SD_RES_VDI_LOCKED, "VDI is already locked"}, ! 327: {SD_RES_NO_VDI, "No vdi found"}, ! 328: {SD_RES_NO_BASE_VDI, "No base VDI found"}, ! 329: {SD_RES_VDI_READ, "Failed read the requested VDI"}, ! 330: {SD_RES_VDI_WRITE, "Failed to write the requested VDI"}, ! 331: {SD_RES_BASE_VDI_READ, "Failed to read the base VDI"}, ! 332: {SD_RES_BASE_VDI_WRITE, "Failed to write the base VDI"}, ! 333: {SD_RES_NO_TAG, "Failed to find the requested tag"}, ! 334: {SD_RES_STARTUP, "The system is still booting"}, ! 335: {SD_RES_VDI_NOT_LOCKED, "VDI isn't locked"}, ! 336: {SD_RES_SHUTDOWN, "The system is shutting down"}, ! 337: {SD_RES_NO_MEM, "Out of memory on the server"}, ! 338: {SD_RES_FULL_VDI, "We already have the maximum vdis"}, ! 339: {SD_RES_VER_MISMATCH, "Protocol version mismatch"}, ! 340: {SD_RES_NO_SPACE, "Server has no space for new objects"}, ! 341: {SD_RES_WAIT_FOR_FORMAT, "Sheepdog is waiting for a format operation"}, ! 342: {SD_RES_WAIT_FOR_JOIN, "Sheepdog is waiting for other nodes joining"}, ! 343: {SD_RES_JOIN_FAILED, "Target node had failed to join sheepdog"}, ! 344: }; ! 345: ! 346: for (i = 0; i < ARRAY_SIZE(errors); ++i) { ! 347: if (errors[i].err == err) { ! 348: return errors[i].desc; ! 349: } ! 350: } ! 351: ! 352: return "Invalid error code"; ! 353: } ! 354: ! 355: /* ! 356: * Sheepdog I/O handling: ! 357: * ! 358: * 1. In the sd_aio_readv/writev, read/write requests are added to the ! 359: * QEMU Bottom Halves. ! 360: * ! 361: * 2. In sd_readv_writev_bh_cb, the callbacks of BHs, we send the I/O ! 362: * requests to the server and link the requests to the ! 363: * outstanding_list in the BDRVSheepdogState. we exits the ! 364: * function without waiting for receiving the response. ! 365: * ! 366: * 3. We receive the response in aio_read_response, the fd handler to ! 367: * the sheepdog connection. If metadata update is needed, we send ! 368: * the write request to the vdi object in sd_write_done, the write ! 369: * completion function. The AIOCB callback is not called until all ! 370: * the requests belonging to the AIOCB are finished. ! 371: */ ! 372: ! 373: static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb, ! 374: uint64_t oid, unsigned int data_len, ! 375: uint64_t offset, uint8_t flags, ! 376: uint64_t base_oid, unsigned int iov_offset) ! 377: { ! 378: AIOReq *aio_req; ! 379: ! 380: aio_req = qemu_malloc(sizeof(*aio_req)); ! 381: aio_req->aiocb = acb; ! 382: aio_req->iov_offset = iov_offset; ! 383: aio_req->oid = oid; ! 384: aio_req->base_oid = base_oid; ! 385: aio_req->offset = offset; ! 386: aio_req->data_len = data_len; ! 387: aio_req->flags = flags; ! 388: aio_req->id = s->aioreq_seq_num++; ! 389: ! 390: QLIST_INSERT_HEAD(&s->outstanding_aio_head, aio_req, ! 391: outstanding_aio_siblings); ! 392: QLIST_INSERT_HEAD(&acb->aioreq_head, aio_req, aioreq_siblings); ! 393: ! 394: return aio_req; ! 395: } ! 396: ! 397: static inline int free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req) ! 398: { ! 399: SheepdogAIOCB *acb = aio_req->aiocb; ! 400: QLIST_REMOVE(aio_req, outstanding_aio_siblings); ! 401: QLIST_REMOVE(aio_req, aioreq_siblings); ! 402: qemu_free(aio_req); ! 403: ! 404: return !QLIST_EMPTY(&acb->aioreq_head); ! 405: } ! 406: ! 407: static void sd_finish_aiocb(SheepdogAIOCB *acb) ! 408: { ! 409: if (!acb->canceled) { ! 410: acb->common.cb(acb->common.opaque, acb->ret); ! 411: } ! 412: qemu_aio_release(acb); ! 413: } ! 414: ! 415: static void sd_aio_cancel(BlockDriverAIOCB *blockacb) ! 416: { ! 417: SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb; ! 418: ! 419: /* ! 420: * Sheepdog cannot cancel the requests which are already sent to ! 421: * the servers, so we just complete the request with -EIO here. ! 422: */ ! 423: acb->common.cb(acb->common.opaque, -EIO); ! 424: acb->canceled = 1; ! 425: } ! 426: ! 427: static AIOPool sd_aio_pool = { ! 428: .aiocb_size = sizeof(SheepdogAIOCB), ! 429: .cancel = sd_aio_cancel, ! 430: }; ! 431: ! 432: static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, ! 433: int64_t sector_num, int nb_sectors, ! 434: BlockDriverCompletionFunc *cb, void *opaque) ! 435: { ! 436: SheepdogAIOCB *acb; ! 437: ! 438: acb = qemu_aio_get(&sd_aio_pool, bs, cb, opaque); ! 439: ! 440: acb->qiov = qiov; ! 441: ! 442: acb->sector_num = sector_num; ! 443: acb->nb_sectors = nb_sectors; ! 444: ! 445: acb->aio_done_func = NULL; ! 446: acb->canceled = 0; ! 447: acb->bh = NULL; ! 448: acb->ret = 0; ! 449: QLIST_INIT(&acb->aioreq_head); ! 450: return acb; ! 451: } ! 452: ! 453: static int sd_schedule_bh(QEMUBHFunc *cb, SheepdogAIOCB *acb) ! 454: { ! 455: if (acb->bh) { ! 456: error_report("bug: %d %d\n", acb->aiocb_type, acb->aiocb_type); ! 457: return -EIO; ! 458: } ! 459: ! 460: acb->bh = qemu_bh_new(cb, acb); ! 461: if (!acb->bh) { ! 462: error_report("oom: %d %d\n", acb->aiocb_type, acb->aiocb_type); ! 463: return -EIO; ! 464: } ! 465: ! 466: qemu_bh_schedule(acb->bh); ! 467: ! 468: return 0; ! 469: } ! 470: ! 471: #ifdef _WIN32 ! 472: ! 473: struct msghdr { ! 474: struct iovec *msg_iov; ! 475: size_t msg_iovlen; ! 476: }; ! 477: ! 478: static ssize_t sendmsg(int s, const struct msghdr *msg, int flags) ! 479: { ! 480: size_t size = 0; ! 481: char *buf, *p; ! 482: int i, ret; ! 483: ! 484: /* count the msg size */ ! 485: for (i = 0; i < msg->msg_iovlen; i++) { ! 486: size += msg->msg_iov[i].iov_len; ! 487: } ! 488: buf = qemu_malloc(size); ! 489: ! 490: p = buf; ! 491: for (i = 0; i < msg->msg_iovlen; i++) { ! 492: memcpy(p, msg->msg_iov[i].iov_base, msg->msg_iov[i].iov_len); ! 493: p += msg->msg_iov[i].iov_len; ! 494: } ! 495: ! 496: ret = send(s, buf, size, flags); ! 497: ! 498: qemu_free(buf); ! 499: return ret; ! 500: } ! 501: ! 502: static ssize_t recvmsg(int s, struct msghdr *msg, int flags) ! 503: { ! 504: size_t size = 0; ! 505: char *buf, *p; ! 506: int i, ret; ! 507: ! 508: /* count the msg size */ ! 509: for (i = 0; i < msg->msg_iovlen; i++) { ! 510: size += msg->msg_iov[i].iov_len; ! 511: } ! 512: buf = qemu_malloc(size); ! 513: ! 514: ret = recv(s, buf, size, flags); ! 515: if (ret < 0) { ! 516: goto out; ! 517: } ! 518: ! 519: p = buf; ! 520: for (i = 0; i < msg->msg_iovlen; i++) { ! 521: memcpy(msg->msg_iov[i].iov_base, p, msg->msg_iov[i].iov_len); ! 522: p += msg->msg_iov[i].iov_len; ! 523: } ! 524: out: ! 525: qemu_free(buf); ! 526: return ret; ! 527: } ! 528: ! 529: #endif ! 530: ! 531: /* ! 532: * Send/recv data with iovec buffers ! 533: * ! 534: * This function send/recv data from/to the iovec buffer directly. ! 535: * The first `offset' bytes in the iovec buffer are skipped and next ! 536: * `len' bytes are used. ! 537: * ! 538: * For example, ! 539: * ! 540: * do_send_recv(sockfd, iov, len, offset, 1); ! 541: * ! 542: * is equals to ! 543: * ! 544: * char *buf = malloc(size); ! 545: * iov_to_buf(iov, iovcnt, buf, offset, size); ! 546: * send(sockfd, buf, size, 0); ! 547: * free(buf); ! 548: */ ! 549: static int do_send_recv(int sockfd, struct iovec *iov, int len, int offset, ! 550: int write) ! 551: { ! 552: struct msghdr msg; ! 553: int ret, diff; ! 554: ! 555: memset(&msg, 0, sizeof(msg)); ! 556: msg.msg_iov = iov; ! 557: msg.msg_iovlen = 1; ! 558: ! 559: len += offset; ! 560: ! 561: while (iov->iov_len < len) { ! 562: len -= iov->iov_len; ! 563: ! 564: iov++; ! 565: msg.msg_iovlen++; ! 566: } ! 567: ! 568: diff = iov->iov_len - len; ! 569: iov->iov_len -= diff; ! 570: ! 571: while (msg.msg_iov->iov_len <= offset) { ! 572: offset -= msg.msg_iov->iov_len; ! 573: ! 574: msg.msg_iov++; ! 575: msg.msg_iovlen--; ! 576: } ! 577: ! 578: msg.msg_iov->iov_base = (char *) msg.msg_iov->iov_base + offset; ! 579: msg.msg_iov->iov_len -= offset; ! 580: ! 581: if (write) { ! 582: ret = sendmsg(sockfd, &msg, 0); ! 583: } else { ! 584: ret = recvmsg(sockfd, &msg, 0); ! 585: } ! 586: ! 587: msg.msg_iov->iov_base = (char *) msg.msg_iov->iov_base - offset; ! 588: msg.msg_iov->iov_len += offset; ! 589: ! 590: iov->iov_len += diff; ! 591: return ret; ! 592: } ! 593: ! 594: static int connect_to_sdog(const char *addr, const char *port) ! 595: { ! 596: char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV]; ! 597: int fd, ret; ! 598: struct addrinfo hints, *res, *res0; ! 599: ! 600: if (!addr) { ! 601: addr = SD_DEFAULT_ADDR; ! 602: port = SD_DEFAULT_PORT; ! 603: } ! 604: ! 605: memset(&hints, 0, sizeof(hints)); ! 606: hints.ai_socktype = SOCK_STREAM; ! 607: ! 608: ret = getaddrinfo(addr, port, &hints, &res0); ! 609: if (ret) { ! 610: error_report("unable to get address info %s, %s\n", ! 611: addr, strerror(errno)); ! 612: return -1; ! 613: } ! 614: ! 615: for (res = res0; res; res = res->ai_next) { ! 616: ret = getnameinfo(res->ai_addr, res->ai_addrlen, hbuf, sizeof(hbuf), ! 617: sbuf, sizeof(sbuf), NI_NUMERICHOST | NI_NUMERICSERV); ! 618: if (ret) { ! 619: continue; ! 620: } ! 621: ! 622: fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol); ! 623: if (fd < 0) { ! 624: continue; ! 625: } ! 626: ! 627: reconnect: ! 628: ret = connect(fd, res->ai_addr, res->ai_addrlen); ! 629: if (ret < 0) { ! 630: if (errno == EINTR) { ! 631: goto reconnect; ! 632: } ! 633: break; ! 634: } ! 635: ! 636: dprintf("connected to %s:%s\n", addr, port); ! 637: goto success; ! 638: } ! 639: fd = -1; ! 640: error_report("failed connect to %s:%s\n", addr, port); ! 641: success: ! 642: freeaddrinfo(res0); ! 643: return fd; ! 644: } ! 645: ! 646: static int do_readv_writev(int sockfd, struct iovec *iov, int len, ! 647: int iov_offset, int write) ! 648: { ! 649: int ret; ! 650: again: ! 651: ret = do_send_recv(sockfd, iov, len, iov_offset, write); ! 652: if (ret < 0) { ! 653: if (errno == EINTR || errno == EAGAIN) { ! 654: goto again; ! 655: } ! 656: error_report("failed to recv a rsp, %s\n", strerror(errno)); ! 657: return 1; ! 658: } ! 659: ! 660: iov_offset += ret; ! 661: len -= ret; ! 662: if (len) { ! 663: goto again; ! 664: } ! 665: ! 666: return 0; ! 667: } ! 668: ! 669: static int do_readv(int sockfd, struct iovec *iov, int len, int iov_offset) ! 670: { ! 671: return do_readv_writev(sockfd, iov, len, iov_offset, 0); ! 672: } ! 673: ! 674: static int do_writev(int sockfd, struct iovec *iov, int len, int iov_offset) ! 675: { ! 676: return do_readv_writev(sockfd, iov, len, iov_offset, 1); ! 677: } ! 678: ! 679: static int do_read_write(int sockfd, void *buf, int len, int write) ! 680: { ! 681: struct iovec iov; ! 682: ! 683: iov.iov_base = buf; ! 684: iov.iov_len = len; ! 685: ! 686: return do_readv_writev(sockfd, &iov, len, 0, write); ! 687: } ! 688: ! 689: static int do_read(int sockfd, void *buf, int len) ! 690: { ! 691: return do_read_write(sockfd, buf, len, 0); ! 692: } ! 693: ! 694: static int do_write(int sockfd, void *buf, int len) ! 695: { ! 696: return do_read_write(sockfd, buf, len, 1); ! 697: } ! 698: ! 699: static int send_req(int sockfd, SheepdogReq *hdr, void *data, ! 700: unsigned int *wlen) ! 701: { ! 702: int ret; ! 703: struct iovec iov[2]; ! 704: ! 705: iov[0].iov_base = hdr; ! 706: iov[0].iov_len = sizeof(*hdr); ! 707: ! 708: if (*wlen) { ! 709: iov[1].iov_base = data; ! 710: iov[1].iov_len = *wlen; ! 711: } ! 712: ! 713: ret = do_writev(sockfd, iov, sizeof(*hdr) + *wlen, 0); ! 714: if (ret) { ! 715: error_report("failed to send a req, %s\n", strerror(errno)); ! 716: ret = -1; ! 717: } ! 718: ! 719: return ret; ! 720: } ! 721: ! 722: static int do_req(int sockfd, SheepdogReq *hdr, void *data, ! 723: unsigned int *wlen, unsigned int *rlen) ! 724: { ! 725: int ret; ! 726: ! 727: ret = send_req(sockfd, hdr, data, wlen); ! 728: if (ret) { ! 729: ret = -1; ! 730: goto out; ! 731: } ! 732: ! 733: ret = do_read(sockfd, hdr, sizeof(*hdr)); ! 734: if (ret) { ! 735: error_report("failed to get a rsp, %s\n", strerror(errno)); ! 736: ret = -1; ! 737: goto out; ! 738: } ! 739: ! 740: if (*rlen > hdr->data_length) { ! 741: *rlen = hdr->data_length; ! 742: } ! 743: ! 744: if (*rlen) { ! 745: ret = do_read(sockfd, data, *rlen); ! 746: if (ret) { ! 747: error_report("failed to get the data, %s\n", strerror(errno)); ! 748: ret = -1; ! 749: goto out; ! 750: } ! 751: } ! 752: ret = 0; ! 753: out: ! 754: return ret; ! 755: } ! 756: ! 757: static int add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, ! 758: struct iovec *iov, int niov, int create, ! 759: enum AIOCBState aiocb_type); ! 760: ! 761: /* ! 762: * This function searchs pending requests to the object `oid', and ! 763: * sends them. ! 764: */ ! 765: static void send_pending_req(BDRVSheepdogState *s, uint64_t oid, uint32_t id) ! 766: { ! 767: AIOReq *aio_req, *next; ! 768: SheepdogAIOCB *acb; ! 769: int ret; ! 770: ! 771: QLIST_FOREACH_SAFE(aio_req, &s->outstanding_aio_head, ! 772: outstanding_aio_siblings, next) { ! 773: if (id == aio_req->id) { ! 774: continue; ! 775: } ! 776: if (aio_req->oid != oid) { ! 777: continue; ! 778: } ! 779: ! 780: acb = aio_req->aiocb; ! 781: ret = add_aio_request(s, aio_req, acb->qiov->iov, ! 782: acb->qiov->niov, 0, acb->aiocb_type); ! 783: if (ret < 0) { ! 784: error_report("add_aio_request is failed\n"); ! 785: free_aio_req(s, aio_req); ! 786: if (QLIST_EMPTY(&acb->aioreq_head)) { ! 787: sd_finish_aiocb(acb); ! 788: } ! 789: } ! 790: } ! 791: } ! 792: ! 793: /* ! 794: * Receive responses of the I/O requests. ! 795: * ! 796: * This function is registered as a fd handler, and called from the ! 797: * main loop when s->fd is ready for reading responses. ! 798: */ ! 799: static void aio_read_response(void *opaque) ! 800: { ! 801: SheepdogObjRsp rsp; ! 802: BDRVSheepdogState *s = opaque; ! 803: int fd = s->fd; ! 804: int ret; ! 805: AIOReq *aio_req = NULL; ! 806: SheepdogAIOCB *acb; ! 807: int rest; ! 808: unsigned long idx; ! 809: ! 810: if (QLIST_EMPTY(&s->outstanding_aio_head)) { ! 811: return; ! 812: } ! 813: ! 814: /* read a header */ ! 815: ret = do_read(fd, &rsp, sizeof(rsp)); ! 816: if (ret) { ! 817: error_report("failed to get the header, %s\n", strerror(errno)); ! 818: return; ! 819: } ! 820: ! 821: /* find the right aio_req from the outstanding_aio list */ ! 822: QLIST_FOREACH(aio_req, &s->outstanding_aio_head, outstanding_aio_siblings) { ! 823: if (aio_req->id == rsp.id) { ! 824: break; ! 825: } ! 826: } ! 827: if (!aio_req) { ! 828: error_report("cannot find aio_req %x\n", rsp.id); ! 829: return; ! 830: } ! 831: ! 832: acb = aio_req->aiocb; ! 833: ! 834: switch (acb->aiocb_type) { ! 835: case AIOCB_WRITE_UDATA: ! 836: if (!is_data_obj(aio_req->oid)) { ! 837: break; ! 838: } ! 839: idx = data_oid_to_idx(aio_req->oid); ! 840: ! 841: if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) { ! 842: /* ! 843: * If the object is newly created one, we need to update ! 844: * the vdi object (metadata object). min_dirty_data_idx ! 845: * and max_dirty_data_idx are changed to include updated ! 846: * index between them. ! 847: */ ! 848: s->inode.data_vdi_id[idx] = s->inode.vdi_id; ! 849: s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx); ! 850: s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx); ! 851: ! 852: /* ! 853: * Some requests may be blocked because simultaneous ! 854: * create requests are not allowed, so we search the ! 855: * pending requests here. ! 856: */ ! 857: send_pending_req(s, vid_to_data_oid(s->inode.vdi_id, idx), rsp.id); ! 858: } ! 859: break; ! 860: case AIOCB_READ_UDATA: ! 861: ret = do_readv(fd, acb->qiov->iov, rsp.data_length, ! 862: aio_req->iov_offset); ! 863: if (ret) { ! 864: error_report("failed to get the data, %s\n", strerror(errno)); ! 865: return; ! 866: } ! 867: break; ! 868: } ! 869: ! 870: if (rsp.result != SD_RES_SUCCESS) { ! 871: acb->ret = -EIO; ! 872: error_report("%s\n", sd_strerror(rsp.result)); ! 873: } ! 874: ! 875: rest = free_aio_req(s, aio_req); ! 876: if (!rest) { ! 877: /* ! 878: * We've finished all requests which belong to the AIOCB, so ! 879: * we can call the callback now. ! 880: */ ! 881: acb->aio_done_func(acb); ! 882: } ! 883: } ! 884: ! 885: static int aio_flush_request(void *opaque) ! 886: { ! 887: BDRVSheepdogState *s = opaque; ! 888: ! 889: return !QLIST_EMPTY(&s->outstanding_aio_head); ! 890: } ! 891: ! 892: #if !defined(SOL_TCP) || !defined(TCP_CORK) ! 893: ! 894: static int set_cork(int fd, int v) ! 895: { ! 896: return 0; ! 897: } ! 898: ! 899: #else ! 900: ! 901: static int set_cork(int fd, int v) ! 902: { ! 903: return setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v)); ! 904: } ! 905: ! 906: #endif ! 907: ! 908: static int set_nodelay(int fd) ! 909: { ! 910: int ret, opt; ! 911: ! 912: opt = 1; ! 913: ret = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(opt)); ! 914: return ret; ! 915: } ! 916: ! 917: /* ! 918: * Return a socket discriptor to read/write objects. ! 919: * ! 920: * We cannot use this discriptor for other operations because ! 921: * the block driver may be on waiting response from the server. ! 922: */ ! 923: static int get_sheep_fd(BDRVSheepdogState *s) ! 924: { ! 925: int ret, fd; ! 926: ! 927: fd = connect_to_sdog(s->addr, s->port); ! 928: if (fd < 0) { ! 929: error_report("%s\n", strerror(errno)); ! 930: return -1; ! 931: } ! 932: ! 933: socket_set_nonblock(fd); ! 934: ! 935: ret = set_nodelay(fd); ! 936: if (ret) { ! 937: error_report("%s\n", strerror(errno)); ! 938: closesocket(fd); ! 939: return -1; ! 940: } ! 941: ! 942: qemu_aio_set_fd_handler(fd, aio_read_response, NULL, aio_flush_request, ! 943: NULL, s); ! 944: return fd; ! 945: } ! 946: ! 947: /* ! 948: * Parse a filename ! 949: * ! 950: * filename must be one of the following formats: ! 951: * 1. [vdiname] ! 952: * 2. [vdiname]:[snapid] ! 953: * 3. [vdiname]:[tag] ! 954: * 4. [hostname]:[port]:[vdiname] ! 955: * 5. [hostname]:[port]:[vdiname]:[snapid] ! 956: * 6. [hostname]:[port]:[vdiname]:[tag] ! 957: * ! 958: * You can boot from the snapshot images by specifying `snapid` or ! 959: * `tag'. ! 960: * ! 961: * You can run VMs outside the Sheepdog cluster by specifying ! 962: * `hostname' and `port' (experimental). ! 963: */ ! 964: static int parse_vdiname(BDRVSheepdogState *s, const char *filename, ! 965: char *vdi, uint32_t *snapid, char *tag) ! 966: { ! 967: char *p, *q; ! 968: int nr_sep; ! 969: ! 970: p = q = qemu_strdup(filename); ! 971: ! 972: /* count the number of separators */ ! 973: nr_sep = 0; ! 974: while (*p) { ! 975: if (*p == ':') { ! 976: nr_sep++; ! 977: } ! 978: p++; ! 979: } ! 980: p = q; ! 981: ! 982: /* use the first two tokens as hostname and port number. */ ! 983: if (nr_sep >= 2) { ! 984: s->addr = p; ! 985: p = strchr(p, ':'); ! 986: *p++ = '\0'; ! 987: ! 988: s->port = p; ! 989: p = strchr(p, ':'); ! 990: *p++ = '\0'; ! 991: } else { ! 992: s->addr = NULL; ! 993: s->port = 0; ! 994: } ! 995: ! 996: strncpy(vdi, p, SD_MAX_VDI_LEN); ! 997: ! 998: p = strchr(vdi, ':'); ! 999: if (p) { ! 1000: *p++ = '\0'; ! 1001: *snapid = strtoul(p, NULL, 10); ! 1002: if (*snapid == 0) { ! 1003: strncpy(tag, p, SD_MAX_VDI_TAG_LEN); ! 1004: } ! 1005: } else { ! 1006: *snapid = CURRENT_VDI_ID; /* search current vdi */ ! 1007: } ! 1008: ! 1009: if (s->addr == NULL) { ! 1010: qemu_free(q); ! 1011: } ! 1012: ! 1013: return 0; ! 1014: } ! 1015: ! 1016: static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid, ! 1017: char *tag, uint32_t *vid, int for_snapshot) ! 1018: { ! 1019: int ret, fd; ! 1020: SheepdogVdiReq hdr; ! 1021: SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; ! 1022: unsigned int wlen, rlen = 0; ! 1023: char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN]; ! 1024: ! 1025: fd = connect_to_sdog(s->addr, s->port); ! 1026: if (fd < 0) { ! 1027: return -1; ! 1028: } ! 1029: ! 1030: memset(buf, 0, sizeof(buf)); ! 1031: strncpy(buf, filename, SD_MAX_VDI_LEN); ! 1032: strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN); ! 1033: ! 1034: memset(&hdr, 0, sizeof(hdr)); ! 1035: if (for_snapshot) { ! 1036: hdr.opcode = SD_OP_GET_VDI_INFO; ! 1037: } else { ! 1038: hdr.opcode = SD_OP_LOCK_VDI; ! 1039: } ! 1040: wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN; ! 1041: hdr.proto_ver = SD_PROTO_VER; ! 1042: hdr.data_length = wlen; ! 1043: hdr.snapid = snapid; ! 1044: hdr.flags = SD_FLAG_CMD_WRITE; ! 1045: ! 1046: ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen); ! 1047: if (ret) { ! 1048: ret = -1; ! 1049: goto out; ! 1050: } ! 1051: ! 1052: if (rsp->result != SD_RES_SUCCESS) { ! 1053: error_report("cannot get vdi info, %s, %s %d %s\n", ! 1054: sd_strerror(rsp->result), filename, snapid, tag); ! 1055: ret = -1; ! 1056: goto out; ! 1057: } ! 1058: *vid = rsp->vdi_id; ! 1059: ! 1060: ret = 0; ! 1061: out: ! 1062: closesocket(fd); ! 1063: return ret; ! 1064: } ! 1065: ! 1066: static int add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, ! 1067: struct iovec *iov, int niov, int create, ! 1068: enum AIOCBState aiocb_type) ! 1069: { ! 1070: int nr_copies = s->inode.nr_copies; ! 1071: SheepdogObjReq hdr; ! 1072: unsigned int wlen; ! 1073: int ret; ! 1074: uint64_t oid = aio_req->oid; ! 1075: unsigned int datalen = aio_req->data_len; ! 1076: uint64_t offset = aio_req->offset; ! 1077: uint8_t flags = aio_req->flags; ! 1078: uint64_t old_oid = aio_req->base_oid; ! 1079: ! 1080: if (!nr_copies) { ! 1081: error_report("bug\n"); ! 1082: } ! 1083: ! 1084: memset(&hdr, 0, sizeof(hdr)); ! 1085: ! 1086: if (aiocb_type == AIOCB_READ_UDATA) { ! 1087: wlen = 0; ! 1088: hdr.opcode = SD_OP_READ_OBJ; ! 1089: hdr.flags = flags; ! 1090: } else if (create) { ! 1091: wlen = datalen; ! 1092: hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ; ! 1093: hdr.flags = SD_FLAG_CMD_WRITE | flags; ! 1094: } else { ! 1095: wlen = datalen; ! 1096: hdr.opcode = SD_OP_WRITE_OBJ; ! 1097: hdr.flags = SD_FLAG_CMD_WRITE | flags; ! 1098: } ! 1099: ! 1100: hdr.oid = oid; ! 1101: hdr.cow_oid = old_oid; ! 1102: hdr.copies = s->inode.nr_copies; ! 1103: ! 1104: hdr.data_length = datalen; ! 1105: hdr.offset = offset; ! 1106: ! 1107: hdr.id = aio_req->id; ! 1108: ! 1109: set_cork(s->fd, 1); ! 1110: ! 1111: /* send a header */ ! 1112: ret = do_write(s->fd, &hdr, sizeof(hdr)); ! 1113: if (ret) { ! 1114: error_report("failed to send a req, %s\n", strerror(errno)); ! 1115: return -EIO; ! 1116: } ! 1117: ! 1118: if (wlen) { ! 1119: ret = do_writev(s->fd, iov, wlen, aio_req->iov_offset); ! 1120: if (ret) { ! 1121: error_report("failed to send a data, %s\n", strerror(errno)); ! 1122: return -EIO; ! 1123: } ! 1124: } ! 1125: ! 1126: set_cork(s->fd, 0); ! 1127: ! 1128: return 0; ! 1129: } ! 1130: ! 1131: static int read_write_object(int fd, char *buf, uint64_t oid, int copies, ! 1132: unsigned int datalen, uint64_t offset, ! 1133: int write, int create) ! 1134: { ! 1135: SheepdogObjReq hdr; ! 1136: SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr; ! 1137: unsigned int wlen, rlen; ! 1138: int ret; ! 1139: ! 1140: memset(&hdr, 0, sizeof(hdr)); ! 1141: ! 1142: if (write) { ! 1143: wlen = datalen; ! 1144: rlen = 0; ! 1145: hdr.flags = SD_FLAG_CMD_WRITE; ! 1146: if (create) { ! 1147: hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ; ! 1148: } else { ! 1149: hdr.opcode = SD_OP_WRITE_OBJ; ! 1150: } ! 1151: } else { ! 1152: wlen = 0; ! 1153: rlen = datalen; ! 1154: hdr.opcode = SD_OP_READ_OBJ; ! 1155: } ! 1156: hdr.oid = oid; ! 1157: hdr.data_length = datalen; ! 1158: hdr.offset = offset; ! 1159: hdr.copies = copies; ! 1160: ! 1161: ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen); ! 1162: if (ret) { ! 1163: error_report("failed to send a request to the sheep\n"); ! 1164: return -1; ! 1165: } ! 1166: ! 1167: switch (rsp->result) { ! 1168: case SD_RES_SUCCESS: ! 1169: return 0; ! 1170: default: ! 1171: error_report("%s\n", sd_strerror(rsp->result)); ! 1172: return -1; ! 1173: } ! 1174: } ! 1175: ! 1176: static int read_object(int fd, char *buf, uint64_t oid, int copies, ! 1177: unsigned int datalen, uint64_t offset) ! 1178: { ! 1179: return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0); ! 1180: } ! 1181: ! 1182: static int write_object(int fd, char *buf, uint64_t oid, int copies, ! 1183: unsigned int datalen, uint64_t offset, int create) ! 1184: { ! 1185: return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create); ! 1186: } ! 1187: ! 1188: static int sd_open(BlockDriverState *bs, const char *filename, int flags) ! 1189: { ! 1190: int ret, fd; ! 1191: uint32_t vid = 0; ! 1192: BDRVSheepdogState *s = bs->opaque; ! 1193: char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN]; ! 1194: uint32_t snapid; ! 1195: char *buf = NULL; ! 1196: ! 1197: strstart(filename, "sheepdog:", (const char **)&filename); ! 1198: ! 1199: QLIST_INIT(&s->outstanding_aio_head); ! 1200: s->fd = -1; ! 1201: ! 1202: memset(vdi, 0, sizeof(vdi)); ! 1203: memset(tag, 0, sizeof(tag)); ! 1204: if (parse_vdiname(s, filename, vdi, &snapid, tag) < 0) { ! 1205: goto out; ! 1206: } ! 1207: s->fd = get_sheep_fd(s); ! 1208: if (s->fd < 0) { ! 1209: goto out; ! 1210: } ! 1211: ! 1212: ret = find_vdi_name(s, vdi, snapid, tag, &vid, 0); ! 1213: if (ret) { ! 1214: goto out; ! 1215: } ! 1216: ! 1217: if (snapid) { ! 1218: dprintf("%" PRIx32 " snapshot inode was open.\n", vid); ! 1219: s->is_snapshot = 1; ! 1220: } ! 1221: ! 1222: fd = connect_to_sdog(s->addr, s->port); ! 1223: if (fd < 0) { ! 1224: error_report("failed to connect\n"); ! 1225: goto out; ! 1226: } ! 1227: ! 1228: buf = qemu_malloc(SD_INODE_SIZE); ! 1229: ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0); ! 1230: ! 1231: closesocket(fd); ! 1232: ! 1233: if (ret) { ! 1234: goto out; ! 1235: } ! 1236: ! 1237: memcpy(&s->inode, buf, sizeof(s->inode)); ! 1238: s->min_dirty_data_idx = UINT32_MAX; ! 1239: s->max_dirty_data_idx = 0; ! 1240: ! 1241: bs->total_sectors = s->inode.vdi_size / SECTOR_SIZE; ! 1242: strncpy(s->name, vdi, sizeof(s->name)); ! 1243: qemu_free(buf); ! 1244: return 0; ! 1245: out: ! 1246: qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL, NULL); ! 1247: if (s->fd >= 0) { ! 1248: closesocket(s->fd); ! 1249: } ! 1250: qemu_free(buf); ! 1251: return -1; ! 1252: } ! 1253: ! 1254: static int do_sd_create(char *filename, int64_t vdi_size, ! 1255: uint32_t base_vid, uint32_t *vdi_id, int snapshot, ! 1256: const char *addr, const char *port) ! 1257: { ! 1258: SheepdogVdiReq hdr; ! 1259: SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; ! 1260: int fd, ret; ! 1261: unsigned int wlen, rlen = 0; ! 1262: char buf[SD_MAX_VDI_LEN]; ! 1263: ! 1264: fd = connect_to_sdog(addr, port); ! 1265: if (fd < 0) { ! 1266: return -EIO; ! 1267: } ! 1268: ! 1269: memset(buf, 0, sizeof(buf)); ! 1270: strncpy(buf, filename, SD_MAX_VDI_LEN); ! 1271: ! 1272: memset(&hdr, 0, sizeof(hdr)); ! 1273: hdr.opcode = SD_OP_NEW_VDI; ! 1274: hdr.base_vdi_id = base_vid; ! 1275: ! 1276: wlen = SD_MAX_VDI_LEN; ! 1277: ! 1278: hdr.flags = SD_FLAG_CMD_WRITE; ! 1279: hdr.snapid = snapshot; ! 1280: ! 1281: hdr.data_length = wlen; ! 1282: hdr.vdi_size = vdi_size; ! 1283: ! 1284: ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen); ! 1285: ! 1286: closesocket(fd); ! 1287: ! 1288: if (ret) { ! 1289: return -EIO; ! 1290: } ! 1291: ! 1292: if (rsp->result != SD_RES_SUCCESS) { ! 1293: error_report("%s, %s\n", sd_strerror(rsp->result), filename); ! 1294: return -EIO; ! 1295: } ! 1296: ! 1297: if (vdi_id) { ! 1298: *vdi_id = rsp->vdi_id; ! 1299: } ! 1300: ! 1301: return 0; ! 1302: } ! 1303: ! 1304: static int sd_create(const char *filename, QEMUOptionParameter *options) ! 1305: { ! 1306: int ret; ! 1307: uint32_t vid = 0; ! 1308: int64_t vdi_size = 0; ! 1309: char *backing_file = NULL; ! 1310: ! 1311: strstart(filename, "sheepdog:", (const char **)&filename); ! 1312: ! 1313: while (options && options->name) { ! 1314: if (!strcmp(options->name, BLOCK_OPT_SIZE)) { ! 1315: vdi_size = options->value.n; ! 1316: } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) { ! 1317: backing_file = options->value.s; ! 1318: } ! 1319: options++; ! 1320: } ! 1321: ! 1322: if (vdi_size > SD_MAX_VDI_SIZE) { ! 1323: error_report("too big image size\n"); ! 1324: return -EINVAL; ! 1325: } ! 1326: ! 1327: if (backing_file) { ! 1328: BlockDriverState *bs; ! 1329: BDRVSheepdogState *s; ! 1330: BlockDriver *drv; ! 1331: ! 1332: /* Currently, only Sheepdog backing image is supported. */ ! 1333: drv = bdrv_find_protocol(backing_file); ! 1334: if (!drv || strcmp(drv->protocol_name, "sheepdog") != 0) { ! 1335: error_report("backing_file must be a sheepdog image\n"); ! 1336: return -EINVAL; ! 1337: } ! 1338: ! 1339: ret = bdrv_file_open(&bs, backing_file, 0); ! 1340: if (ret < 0) ! 1341: return -EIO; ! 1342: ! 1343: s = bs->opaque; ! 1344: ! 1345: if (!is_snapshot(&s->inode)) { ! 1346: error_report("cannot clone from a non snapshot vdi\n"); ! 1347: bdrv_delete(bs); ! 1348: return -EINVAL; ! 1349: } ! 1350: ! 1351: vid = s->inode.vdi_id; ! 1352: bdrv_delete(bs); ! 1353: } ! 1354: ! 1355: return do_sd_create((char *)filename, vdi_size, vid, NULL, 0, NULL, NULL); ! 1356: } ! 1357: ! 1358: static void sd_close(BlockDriverState *bs) ! 1359: { ! 1360: BDRVSheepdogState *s = bs->opaque; ! 1361: SheepdogVdiReq hdr; ! 1362: SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; ! 1363: unsigned int wlen, rlen = 0; ! 1364: int fd, ret; ! 1365: ! 1366: dprintf("%s\n", s->name); ! 1367: ! 1368: fd = connect_to_sdog(s->addr, s->port); ! 1369: if (fd < 0) { ! 1370: return; ! 1371: } ! 1372: ! 1373: memset(&hdr, 0, sizeof(hdr)); ! 1374: ! 1375: hdr.opcode = SD_OP_RELEASE_VDI; ! 1376: wlen = strlen(s->name) + 1; ! 1377: hdr.data_length = wlen; ! 1378: hdr.flags = SD_FLAG_CMD_WRITE; ! 1379: ! 1380: ret = do_req(fd, (SheepdogReq *)&hdr, s->name, &wlen, &rlen); ! 1381: ! 1382: closesocket(fd); ! 1383: ! 1384: if (!ret && rsp->result != SD_RES_SUCCESS && ! 1385: rsp->result != SD_RES_VDI_NOT_LOCKED) { ! 1386: error_report("%s, %s\n", sd_strerror(rsp->result), s->name); ! 1387: } ! 1388: ! 1389: qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL, NULL); ! 1390: closesocket(s->fd); ! 1391: qemu_free(s->addr); ! 1392: } ! 1393: ! 1394: static int64_t sd_getlength(BlockDriverState *bs) ! 1395: { ! 1396: BDRVSheepdogState *s = bs->opaque; ! 1397: ! 1398: return s->inode.vdi_size; ! 1399: } ! 1400: ! 1401: static int sd_truncate(BlockDriverState *bs, int64_t offset) ! 1402: { ! 1403: BDRVSheepdogState *s = bs->opaque; ! 1404: int ret, fd; ! 1405: unsigned int datalen; ! 1406: ! 1407: if (offset < s->inode.vdi_size) { ! 1408: error_report("shrinking is not supported\n"); ! 1409: return -EINVAL; ! 1410: } else if (offset > SD_MAX_VDI_SIZE) { ! 1411: error_report("too big image size\n"); ! 1412: return -EINVAL; ! 1413: } ! 1414: ! 1415: fd = connect_to_sdog(s->addr, s->port); ! 1416: if (fd < 0) { ! 1417: return -EIO; ! 1418: } ! 1419: ! 1420: /* we don't need to update entire object */ ! 1421: datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); ! 1422: s->inode.vdi_size = offset; ! 1423: ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), ! 1424: s->inode.nr_copies, datalen, 0, 0); ! 1425: close(fd); ! 1426: ! 1427: if (ret < 0) { ! 1428: error_report("failed to update an inode.\n"); ! 1429: return -EIO; ! 1430: } ! 1431: ! 1432: return 0; ! 1433: } ! 1434: ! 1435: /* ! 1436: * This function is called after writing data objects. If we need to ! 1437: * update metadata, this sends a write request to the vdi object. ! 1438: * Otherwise, this calls the AIOCB callback. ! 1439: */ ! 1440: static void sd_write_done(SheepdogAIOCB *acb) ! 1441: { ! 1442: int ret; ! 1443: BDRVSheepdogState *s = acb->common.bs->opaque; ! 1444: struct iovec iov; ! 1445: AIOReq *aio_req; ! 1446: uint32_t offset, data_len, mn, mx; ! 1447: ! 1448: mn = s->min_dirty_data_idx; ! 1449: mx = s->max_dirty_data_idx; ! 1450: if (mn <= mx) { ! 1451: /* we need to update the vdi object. */ ! 1452: offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) + ! 1453: mn * sizeof(s->inode.data_vdi_id[0]); ! 1454: data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]); ! 1455: ! 1456: s->min_dirty_data_idx = UINT32_MAX; ! 1457: s->max_dirty_data_idx = 0; ! 1458: ! 1459: iov.iov_base = &s->inode; ! 1460: iov.iov_len = sizeof(s->inode); ! 1461: aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), ! 1462: data_len, offset, 0, 0, offset); ! 1463: ret = add_aio_request(s, aio_req, &iov, 1, 0, AIOCB_WRITE_UDATA); ! 1464: if (ret) { ! 1465: free_aio_req(s, aio_req); ! 1466: acb->ret = -EIO; ! 1467: goto out; ! 1468: } ! 1469: ! 1470: acb->aio_done_func = sd_finish_aiocb; ! 1471: acb->aiocb_type = AIOCB_WRITE_UDATA; ! 1472: return; ! 1473: } ! 1474: out: ! 1475: sd_finish_aiocb(acb); ! 1476: } ! 1477: ! 1478: /* ! 1479: * Create a writable VDI from a snapshot ! 1480: */ ! 1481: static int sd_create_branch(BDRVSheepdogState *s) ! 1482: { ! 1483: int ret, fd; ! 1484: uint32_t vid; ! 1485: char *buf; ! 1486: ! 1487: dprintf("%" PRIx32 " is snapshot.\n", s->inode.vdi_id); ! 1488: ! 1489: buf = qemu_malloc(SD_INODE_SIZE); ! 1490: ! 1491: ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1, ! 1492: s->addr, s->port); ! 1493: if (ret) { ! 1494: goto out; ! 1495: } ! 1496: ! 1497: dprintf("%" PRIx32 " is created.\n", vid); ! 1498: ! 1499: fd = connect_to_sdog(s->addr, s->port); ! 1500: if (fd < 0) { ! 1501: error_report("failed to connect\n"); ! 1502: goto out; ! 1503: } ! 1504: ! 1505: ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies, ! 1506: SD_INODE_SIZE, 0); ! 1507: ! 1508: closesocket(fd); ! 1509: ! 1510: if (ret < 0) { ! 1511: goto out; ! 1512: } ! 1513: ! 1514: memcpy(&s->inode, buf, sizeof(s->inode)); ! 1515: ! 1516: s->is_snapshot = 0; ! 1517: ret = 0; ! 1518: dprintf("%" PRIx32 " was newly created.\n", s->inode.vdi_id); ! 1519: ! 1520: out: ! 1521: qemu_free(buf); ! 1522: ! 1523: return ret; ! 1524: } ! 1525: ! 1526: /* ! 1527: * Send I/O requests to the server. ! 1528: * ! 1529: * This function sends requests to the server, links the requests to ! 1530: * the outstanding_list in BDRVSheepdogState, and exits without ! 1531: * waiting the response. The responses are received in the ! 1532: * `aio_read_response' function which is called from the main loop as ! 1533: * a fd handler. ! 1534: */ ! 1535: static void sd_readv_writev_bh_cb(void *p) ! 1536: { ! 1537: SheepdogAIOCB *acb = p; ! 1538: int ret = 0; ! 1539: unsigned long len, done = 0, total = acb->nb_sectors * SECTOR_SIZE; ! 1540: unsigned long idx = acb->sector_num * SECTOR_SIZE / SD_DATA_OBJ_SIZE; ! 1541: uint64_t oid; ! 1542: uint64_t offset = (acb->sector_num * SECTOR_SIZE) % SD_DATA_OBJ_SIZE; ! 1543: BDRVSheepdogState *s = acb->common.bs->opaque; ! 1544: SheepdogInode *inode = &s->inode; ! 1545: AIOReq *aio_req; ! 1546: ! 1547: qemu_bh_delete(acb->bh); ! 1548: acb->bh = NULL; ! 1549: ! 1550: if (acb->aiocb_type == AIOCB_WRITE_UDATA && s->is_snapshot) { ! 1551: /* ! 1552: * In the case we open the snapshot VDI, Sheepdog creates the ! 1553: * writable VDI when we do a write operation first. ! 1554: */ ! 1555: ret = sd_create_branch(s); ! 1556: if (ret) { ! 1557: acb->ret = -EIO; ! 1558: goto out; ! 1559: } ! 1560: } ! 1561: ! 1562: while (done != total) { ! 1563: uint8_t flags = 0; ! 1564: uint64_t old_oid = 0; ! 1565: int create = 0; ! 1566: ! 1567: oid = vid_to_data_oid(inode->data_vdi_id[idx], idx); ! 1568: ! 1569: len = MIN(total - done, SD_DATA_OBJ_SIZE - offset); ! 1570: ! 1571: if (!inode->data_vdi_id[idx]) { ! 1572: if (acb->aiocb_type == AIOCB_READ_UDATA) { ! 1573: goto done; ! 1574: } ! 1575: ! 1576: create = 1; ! 1577: } else if (acb->aiocb_type == AIOCB_WRITE_UDATA ! 1578: && !is_data_obj_writeable(inode, idx)) { ! 1579: /* Copy-On-Write */ ! 1580: create = 1; ! 1581: old_oid = oid; ! 1582: flags = SD_FLAG_CMD_COW; ! 1583: } ! 1584: ! 1585: if (create) { ! 1586: dprintf("update ino (%" PRIu32") %" PRIu64 " %" PRIu64 ! 1587: " %" PRIu64 "\n", inode->vdi_id, oid, ! 1588: vid_to_data_oid(inode->data_vdi_id[idx], idx), idx); ! 1589: oid = vid_to_data_oid(inode->vdi_id, idx); ! 1590: dprintf("new oid %lx\n", oid); ! 1591: } ! 1592: ! 1593: aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done); ! 1594: ! 1595: if (create) { ! 1596: AIOReq *areq; ! 1597: QLIST_FOREACH(areq, &s->outstanding_aio_head, ! 1598: outstanding_aio_siblings) { ! 1599: if (areq == aio_req) { ! 1600: continue; ! 1601: } ! 1602: if (areq->oid == oid) { ! 1603: /* ! 1604: * Sheepdog cannot handle simultaneous create ! 1605: * requests to the same object. So we cannot send ! 1606: * the request until the previous request ! 1607: * finishes. ! 1608: */ ! 1609: aio_req->flags = 0; ! 1610: aio_req->base_oid = 0; ! 1611: goto done; ! 1612: } ! 1613: } ! 1614: } ! 1615: ! 1616: ret = add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, ! 1617: create, acb->aiocb_type); ! 1618: if (ret < 0) { ! 1619: error_report("add_aio_request is failed\n"); ! 1620: free_aio_req(s, aio_req); ! 1621: acb->ret = -EIO; ! 1622: goto out; ! 1623: } ! 1624: done: ! 1625: offset = 0; ! 1626: idx++; ! 1627: done += len; ! 1628: } ! 1629: out: ! 1630: if (QLIST_EMPTY(&acb->aioreq_head)) { ! 1631: sd_finish_aiocb(acb); ! 1632: } ! 1633: } ! 1634: ! 1635: static BlockDriverAIOCB *sd_aio_writev(BlockDriverState *bs, int64_t sector_num, ! 1636: QEMUIOVector *qiov, int nb_sectors, ! 1637: BlockDriverCompletionFunc *cb, ! 1638: void *opaque) ! 1639: { ! 1640: SheepdogAIOCB *acb; ! 1641: ! 1642: if (bs->growable && sector_num + nb_sectors > bs->total_sectors) { ! 1643: /* TODO: shouldn't block here */ ! 1644: if (sd_truncate(bs, (sector_num + nb_sectors) * SECTOR_SIZE) < 0) { ! 1645: return NULL; ! 1646: } ! 1647: bs->total_sectors = sector_num + nb_sectors; ! 1648: } ! 1649: ! 1650: acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, cb, opaque); ! 1651: acb->aio_done_func = sd_write_done; ! 1652: acb->aiocb_type = AIOCB_WRITE_UDATA; ! 1653: ! 1654: sd_schedule_bh(sd_readv_writev_bh_cb, acb); ! 1655: return &acb->common; ! 1656: } ! 1657: ! 1658: static BlockDriverAIOCB *sd_aio_readv(BlockDriverState *bs, int64_t sector_num, ! 1659: QEMUIOVector *qiov, int nb_sectors, ! 1660: BlockDriverCompletionFunc *cb, ! 1661: void *opaque) ! 1662: { ! 1663: SheepdogAIOCB *acb; ! 1664: int i; ! 1665: ! 1666: acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, cb, opaque); ! 1667: acb->aiocb_type = AIOCB_READ_UDATA; ! 1668: acb->aio_done_func = sd_finish_aiocb; ! 1669: ! 1670: /* ! 1671: * TODO: we can do better; we don't need to initialize ! 1672: * blindly. ! 1673: */ ! 1674: for (i = 0; i < qiov->niov; i++) { ! 1675: memset(qiov->iov[i].iov_base, 0, qiov->iov[i].iov_len); ! 1676: } ! 1677: ! 1678: sd_schedule_bh(sd_readv_writev_bh_cb, acb); ! 1679: return &acb->common; ! 1680: } ! 1681: ! 1682: static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) ! 1683: { ! 1684: BDRVSheepdogState *s = bs->opaque; ! 1685: int ret, fd; ! 1686: uint32_t new_vid; ! 1687: SheepdogInode *inode; ! 1688: unsigned int datalen; ! 1689: ! 1690: dprintf("sn_info: name %s id_str %s s: name %s vm_state_size %d " ! 1691: "is_snapshot %d\n", sn_info->name, sn_info->id_str, ! 1692: s->name, sn_info->vm_state_size, s->is_snapshot); ! 1693: ! 1694: if (s->is_snapshot) { ! 1695: error_report("You can't create a snapshot of a snapshot VDI, " ! 1696: "%s (%" PRIu32 ").\n", s->name, s->inode.vdi_id); ! 1697: ! 1698: return -EINVAL; ! 1699: } ! 1700: ! 1701: dprintf("%s %s\n", sn_info->name, sn_info->id_str); ! 1702: ! 1703: s->inode.vm_state_size = sn_info->vm_state_size; ! 1704: s->inode.vm_clock_nsec = sn_info->vm_clock_nsec; ! 1705: strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag)); ! 1706: /* we don't need to update entire object */ ! 1707: datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id); ! 1708: ! 1709: /* refresh inode. */ ! 1710: fd = connect_to_sdog(s->addr, s->port); ! 1711: if (fd < 0) { ! 1712: ret = -EIO; ! 1713: goto cleanup; ! 1714: } ! 1715: ! 1716: ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id), ! 1717: s->inode.nr_copies, datalen, 0, 0); ! 1718: if (ret < 0) { ! 1719: error_report("failed to write snapshot's inode.\n"); ! 1720: ret = -EIO; ! 1721: goto cleanup; ! 1722: } ! 1723: ! 1724: ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, 1, ! 1725: s->addr, s->port); ! 1726: if (ret < 0) { ! 1727: error_report("failed to create inode for snapshot. %s\n", ! 1728: strerror(errno)); ! 1729: ret = -EIO; ! 1730: goto cleanup; ! 1731: } ! 1732: ! 1733: inode = (SheepdogInode *)qemu_malloc(datalen); ! 1734: ! 1735: ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid), ! 1736: s->inode.nr_copies, datalen, 0); ! 1737: ! 1738: if (ret < 0) { ! 1739: error_report("failed to read new inode info. %s\n", strerror(errno)); ! 1740: ret = -EIO; ! 1741: goto cleanup; ! 1742: } ! 1743: ! 1744: memcpy(&s->inode, inode, datalen); ! 1745: dprintf("s->inode: name %s snap_id %x oid %x\n", ! 1746: s->inode.name, s->inode.snap_id, s->inode.vdi_id); ! 1747: ! 1748: cleanup: ! 1749: closesocket(fd); ! 1750: return ret; ! 1751: } ! 1752: ! 1753: static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) ! 1754: { ! 1755: BDRVSheepdogState *s = bs->opaque; ! 1756: BDRVSheepdogState *old_s; ! 1757: char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN]; ! 1758: char *buf = NULL; ! 1759: uint32_t vid; ! 1760: uint32_t snapid = 0; ! 1761: int ret = -ENOENT, fd; ! 1762: ! 1763: old_s = qemu_malloc(sizeof(BDRVSheepdogState)); ! 1764: ! 1765: memcpy(old_s, s, sizeof(BDRVSheepdogState)); ! 1766: ! 1767: memset(vdi, 0, sizeof(vdi)); ! 1768: strncpy(vdi, s->name, sizeof(vdi)); ! 1769: ! 1770: memset(tag, 0, sizeof(tag)); ! 1771: snapid = strtoul(snapshot_id, NULL, 10); ! 1772: if (!snapid) { ! 1773: strncpy(tag, s->name, sizeof(tag)); ! 1774: } ! 1775: ! 1776: ret = find_vdi_name(s, vdi, snapid, tag, &vid, 1); ! 1777: if (ret) { ! 1778: error_report("Failed to find_vdi_name\n"); ! 1779: ret = -ENOENT; ! 1780: goto out; ! 1781: } ! 1782: ! 1783: fd = connect_to_sdog(s->addr, s->port); ! 1784: if (fd < 0) { ! 1785: error_report("failed to connect\n"); ! 1786: goto out; ! 1787: } ! 1788: ! 1789: buf = qemu_malloc(SD_INODE_SIZE); ! 1790: ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies, ! 1791: SD_INODE_SIZE, 0); ! 1792: ! 1793: closesocket(fd); ! 1794: ! 1795: if (ret) { ! 1796: ret = -ENOENT; ! 1797: goto out; ! 1798: } ! 1799: ! 1800: memcpy(&s->inode, buf, sizeof(s->inode)); ! 1801: ! 1802: if (!s->inode.vm_state_size) { ! 1803: error_report("Invalid snapshot\n"); ! 1804: ret = -ENOENT; ! 1805: goto out; ! 1806: } ! 1807: ! 1808: s->is_snapshot = 1; ! 1809: ! 1810: qemu_free(buf); ! 1811: qemu_free(old_s); ! 1812: ! 1813: return 0; ! 1814: out: ! 1815: /* recover bdrv_sd_state */ ! 1816: memcpy(s, old_s, sizeof(BDRVSheepdogState)); ! 1817: qemu_free(buf); ! 1818: qemu_free(old_s); ! 1819: ! 1820: error_report("failed to open. recover old bdrv_sd_state.\n"); ! 1821: ! 1822: return ret; ! 1823: } ! 1824: ! 1825: static int sd_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) ! 1826: { ! 1827: /* FIXME: Delete specified snapshot id. */ ! 1828: return 0; ! 1829: } ! 1830: ! 1831: #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) ! 1832: #define BITS_PER_BYTE 8 ! 1833: #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) ! 1834: #define DECLARE_BITMAP(name,bits) \ ! 1835: unsigned long name[BITS_TO_LONGS(bits)] ! 1836: ! 1837: #define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long)) ! 1838: ! 1839: static inline int test_bit(unsigned int nr, const unsigned long *addr) ! 1840: { ! 1841: return ((1UL << (nr % BITS_PER_LONG)) & ! 1842: (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; ! 1843: } ! 1844: ! 1845: static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) ! 1846: { ! 1847: BDRVSheepdogState *s = bs->opaque; ! 1848: SheepdogReq req; ! 1849: int fd, nr = 1024, ret, max = BITS_TO_LONGS(SD_NR_VDIS) * sizeof(long); ! 1850: QEMUSnapshotInfo *sn_tab = NULL; ! 1851: unsigned wlen, rlen; ! 1852: int found = 0; ! 1853: static SheepdogInode inode; ! 1854: unsigned long *vdi_inuse; ! 1855: unsigned int start_nr; ! 1856: uint64_t hval; ! 1857: uint32_t vid; ! 1858: ! 1859: vdi_inuse = qemu_malloc(max); ! 1860: ! 1861: fd = connect_to_sdog(s->addr, s->port); ! 1862: if (fd < 0) { ! 1863: goto out; ! 1864: } ! 1865: ! 1866: rlen = max; ! 1867: wlen = 0; ! 1868: ! 1869: memset(&req, 0, sizeof(req)); ! 1870: ! 1871: req.opcode = SD_OP_READ_VDIS; ! 1872: req.data_length = max; ! 1873: ! 1874: ret = do_req(fd, (SheepdogReq *)&req, vdi_inuse, &wlen, &rlen); ! 1875: ! 1876: closesocket(fd); ! 1877: if (ret) { ! 1878: goto out; ! 1879: } ! 1880: ! 1881: sn_tab = qemu_mallocz(nr * sizeof(*sn_tab)); ! 1882: ! 1883: /* calculate a vdi id with hash function */ ! 1884: hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT); ! 1885: start_nr = hval & (SD_NR_VDIS - 1); ! 1886: ! 1887: fd = connect_to_sdog(s->addr, s->port); ! 1888: if (fd < 0) { ! 1889: error_report("failed to connect\n"); ! 1890: goto out; ! 1891: } ! 1892: ! 1893: for (vid = start_nr; found < nr; vid = (vid + 1) % SD_NR_VDIS) { ! 1894: if (!test_bit(vid, vdi_inuse)) { ! 1895: break; ! 1896: } ! 1897: ! 1898: /* we don't need to read entire object */ ! 1899: ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid), ! 1900: 0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0); ! 1901: ! 1902: if (ret) { ! 1903: continue; ! 1904: } ! 1905: ! 1906: if (!strcmp(inode.name, s->name) && is_snapshot(&inode)) { ! 1907: sn_tab[found].date_sec = inode.snap_ctime >> 32; ! 1908: sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff; ! 1909: sn_tab[found].vm_state_size = inode.vm_state_size; ! 1910: sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec; ! 1911: ! 1912: snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u", ! 1913: inode.snap_id); ! 1914: strncpy(sn_tab[found].name, inode.tag, ! 1915: MIN(sizeof(sn_tab[found].name), sizeof(inode.tag))); ! 1916: found++; ! 1917: } ! 1918: } ! 1919: ! 1920: closesocket(fd); ! 1921: out: ! 1922: *psn_tab = sn_tab; ! 1923: ! 1924: qemu_free(vdi_inuse); ! 1925: ! 1926: return found; ! 1927: } ! 1928: ! 1929: static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data, ! 1930: int64_t pos, int size, int load) ! 1931: { ! 1932: int fd, create; ! 1933: int ret = 0; ! 1934: unsigned int data_len; ! 1935: uint64_t vmstate_oid; ! 1936: uint32_t vdi_index; ! 1937: uint64_t offset; ! 1938: ! 1939: fd = connect_to_sdog(s->addr, s->port); ! 1940: if (fd < 0) { ! 1941: ret = -EIO; ! 1942: goto cleanup; ! 1943: } ! 1944: ! 1945: while (size) { ! 1946: vdi_index = pos / SD_DATA_OBJ_SIZE; ! 1947: offset = pos % SD_DATA_OBJ_SIZE; ! 1948: ! 1949: data_len = MIN(size, SD_DATA_OBJ_SIZE); ! 1950: ! 1951: vmstate_oid = vid_to_vmstate_oid(s->inode.vdi_id, vdi_index); ! 1952: ! 1953: create = (offset == 0); ! 1954: if (load) { ! 1955: ret = read_object(fd, (char *)data, vmstate_oid, ! 1956: s->inode.nr_copies, data_len, offset); ! 1957: } else { ! 1958: ret = write_object(fd, (char *)data, vmstate_oid, ! 1959: s->inode.nr_copies, data_len, offset, create); ! 1960: } ! 1961: ! 1962: if (ret < 0) { ! 1963: error_report("failed to save vmstate %s\n", strerror(errno)); ! 1964: ret = -EIO; ! 1965: goto cleanup; ! 1966: } ! 1967: ! 1968: pos += data_len; ! 1969: size -= data_len; ! 1970: ret += data_len; ! 1971: } ! 1972: cleanup: ! 1973: closesocket(fd); ! 1974: return ret; ! 1975: } ! 1976: ! 1977: static int sd_save_vmstate(BlockDriverState *bs, const uint8_t *data, ! 1978: int64_t pos, int size) ! 1979: { ! 1980: BDRVSheepdogState *s = bs->opaque; ! 1981: ! 1982: return do_load_save_vmstate(s, (uint8_t *)data, pos, size, 0); ! 1983: } ! 1984: ! 1985: static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data, ! 1986: int64_t pos, int size) ! 1987: { ! 1988: BDRVSheepdogState *s = bs->opaque; ! 1989: ! 1990: return do_load_save_vmstate(s, data, pos, size, 1); ! 1991: } ! 1992: ! 1993: ! 1994: static QEMUOptionParameter sd_create_options[] = { ! 1995: { ! 1996: .name = BLOCK_OPT_SIZE, ! 1997: .type = OPT_SIZE, ! 1998: .help = "Virtual disk size" ! 1999: }, ! 2000: { ! 2001: .name = BLOCK_OPT_BACKING_FILE, ! 2002: .type = OPT_STRING, ! 2003: .help = "File name of a base image" ! 2004: }, ! 2005: { NULL } ! 2006: }; ! 2007: ! 2008: BlockDriver bdrv_sheepdog = { ! 2009: .format_name = "sheepdog", ! 2010: .protocol_name = "sheepdog", ! 2011: .instance_size = sizeof(BDRVSheepdogState), ! 2012: .bdrv_file_open = sd_open, ! 2013: .bdrv_close = sd_close, ! 2014: .bdrv_create = sd_create, ! 2015: .bdrv_getlength = sd_getlength, ! 2016: .bdrv_truncate = sd_truncate, ! 2017: ! 2018: .bdrv_aio_readv = sd_aio_readv, ! 2019: .bdrv_aio_writev = sd_aio_writev, ! 2020: ! 2021: .bdrv_snapshot_create = sd_snapshot_create, ! 2022: .bdrv_snapshot_goto = sd_snapshot_goto, ! 2023: .bdrv_snapshot_delete = sd_snapshot_delete, ! 2024: .bdrv_snapshot_list = sd_snapshot_list, ! 2025: ! 2026: .bdrv_save_vmstate = sd_save_vmstate, ! 2027: .bdrv_load_vmstate = sd_load_vmstate, ! 2028: ! 2029: .create_options = sd_create_options, ! 2030: }; ! 2031: ! 2032: static void bdrv_sheepdog_init(void) ! 2033: { ! 2034: bdrv_register(&bdrv_sheepdog); ! 2035: } ! 2036: block_init(bdrv_sheepdog_init);
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.