|
|
1.1 root 1: /*
2: * Copyright (C) 2009-2010 Nippon Telegraph and Telephone Corporation.
3: *
4: * This program is free software; you can redistribute it and/or
5: * modify it under the terms of the GNU General Public License version
6: * 2 as published by the Free Software Foundation.
7: *
8: * You should have received a copy of the GNU General Public License
9: * along with this program. If not, see <http://www.gnu.org/licenses/>.
10: */
11:
12: #include "qemu-common.h"
13: #include "qemu-error.h"
14: #include "qemu_socket.h"
15: #include "block_int.h"
1.1.1.3 ! root 16: #include "bitops.h"
1.1 root 17:
18: #define SD_PROTO_VER 0x01
19:
20: #define SD_DEFAULT_ADDR "localhost"
21: #define SD_DEFAULT_PORT "7000"
22:
23: #define SD_OP_CREATE_AND_WRITE_OBJ 0x01
24: #define SD_OP_READ_OBJ 0x02
25: #define SD_OP_WRITE_OBJ 0x03
26:
27: #define SD_OP_NEW_VDI 0x11
28: #define SD_OP_LOCK_VDI 0x12
29: #define SD_OP_RELEASE_VDI 0x13
30: #define SD_OP_GET_VDI_INFO 0x14
31: #define SD_OP_READ_VDIS 0x15
32:
33: #define SD_FLAG_CMD_WRITE 0x01
34: #define SD_FLAG_CMD_COW 0x02
35:
36: #define SD_RES_SUCCESS 0x00 /* Success */
37: #define SD_RES_UNKNOWN 0x01 /* Unknown error */
38: #define SD_RES_NO_OBJ 0x02 /* No object found */
39: #define SD_RES_EIO 0x03 /* I/O error */
40: #define SD_RES_VDI_EXIST 0x04 /* Vdi exists already */
41: #define SD_RES_INVALID_PARMS 0x05 /* Invalid parameters */
42: #define SD_RES_SYSTEM_ERROR 0x06 /* System error */
43: #define SD_RES_VDI_LOCKED 0x07 /* Vdi is locked */
44: #define SD_RES_NO_VDI 0x08 /* No vdi found */
45: #define SD_RES_NO_BASE_VDI 0x09 /* No base vdi found */
46: #define SD_RES_VDI_READ 0x0A /* Cannot read requested vdi */
47: #define SD_RES_VDI_WRITE 0x0B /* Cannot write requested vdi */
48: #define SD_RES_BASE_VDI_READ 0x0C /* Cannot read base vdi */
49: #define SD_RES_BASE_VDI_WRITE 0x0D /* Cannot write base vdi */
50: #define SD_RES_NO_TAG 0x0E /* Requested tag is not found */
51: #define SD_RES_STARTUP 0x0F /* Sheepdog is on starting up */
52: #define SD_RES_VDI_NOT_LOCKED 0x10 /* Vdi is not locked */
53: #define SD_RES_SHUTDOWN 0x11 /* Sheepdog is shutting down */
54: #define SD_RES_NO_MEM 0x12 /* Cannot allocate memory */
55: #define SD_RES_FULL_VDI 0x13 /* we already have the maximum vdis */
56: #define SD_RES_VER_MISMATCH 0x14 /* Protocol version mismatch */
57: #define SD_RES_NO_SPACE 0x15 /* Server has no room for new objects */
58: #define SD_RES_WAIT_FOR_FORMAT 0x16 /* Waiting for a format operation */
59: #define SD_RES_WAIT_FOR_JOIN 0x17 /* Waiting for other nodes joining */
60: #define SD_RES_JOIN_FAILED 0x18 /* Target node had failed to join sheepdog */
61:
62: /*
63: * Object ID rules
64: *
65: * 0 - 19 (20 bits): data object space
66: * 20 - 31 (12 bits): reserved data object space
67: * 32 - 55 (24 bits): vdi object space
68: * 56 - 59 ( 4 bits): reserved vdi object space
69: * 60 - 63 ( 4 bits): object type indentifier space
70: */
71:
72: #define VDI_SPACE_SHIFT 32
73: #define VDI_BIT (UINT64_C(1) << 63)
74: #define VMSTATE_BIT (UINT64_C(1) << 62)
75: #define MAX_DATA_OBJS (UINT64_C(1) << 20)
76: #define MAX_CHILDREN 1024
77: #define SD_MAX_VDI_LEN 256
78: #define SD_MAX_VDI_TAG_LEN 256
79: #define SD_NR_VDIS (1U << 24)
80: #define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22)
81: #define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS)
82: #define SECTOR_SIZE 512
83:
84: #define SD_INODE_SIZE (sizeof(SheepdogInode))
85: #define CURRENT_VDI_ID 0
86:
87: typedef struct SheepdogReq {
88: uint8_t proto_ver;
89: uint8_t opcode;
90: uint16_t flags;
91: uint32_t epoch;
92: uint32_t id;
93: uint32_t data_length;
94: uint32_t opcode_specific[8];
95: } SheepdogReq;
96:
97: typedef struct SheepdogRsp {
98: uint8_t proto_ver;
99: uint8_t opcode;
100: uint16_t flags;
101: uint32_t epoch;
102: uint32_t id;
103: uint32_t data_length;
104: uint32_t result;
105: uint32_t opcode_specific[7];
106: } SheepdogRsp;
107:
108: typedef struct SheepdogObjReq {
109: uint8_t proto_ver;
110: uint8_t opcode;
111: uint16_t flags;
112: uint32_t epoch;
113: uint32_t id;
114: uint32_t data_length;
115: uint64_t oid;
116: uint64_t cow_oid;
117: uint32_t copies;
118: uint32_t rsvd;
119: uint64_t offset;
120: } SheepdogObjReq;
121:
122: typedef struct SheepdogObjRsp {
123: uint8_t proto_ver;
124: uint8_t opcode;
125: uint16_t flags;
126: uint32_t epoch;
127: uint32_t id;
128: uint32_t data_length;
129: uint32_t result;
130: uint32_t copies;
131: uint32_t pad[6];
132: } SheepdogObjRsp;
133:
134: typedef struct SheepdogVdiReq {
135: uint8_t proto_ver;
136: uint8_t opcode;
137: uint16_t flags;
138: uint32_t epoch;
139: uint32_t id;
140: uint32_t data_length;
141: uint64_t vdi_size;
142: uint32_t base_vdi_id;
143: uint32_t copies;
144: uint32_t snapid;
145: uint32_t pad[3];
146: } SheepdogVdiReq;
147:
148: typedef struct SheepdogVdiRsp {
149: uint8_t proto_ver;
150: uint8_t opcode;
151: uint16_t flags;
152: uint32_t epoch;
153: uint32_t id;
154: uint32_t data_length;
155: uint32_t result;
156: uint32_t rsvd;
157: uint32_t vdi_id;
158: uint32_t pad[5];
159: } SheepdogVdiRsp;
160:
161: typedef struct SheepdogInode {
162: char name[SD_MAX_VDI_LEN];
163: char tag[SD_MAX_VDI_TAG_LEN];
164: uint64_t ctime;
165: uint64_t snap_ctime;
166: uint64_t vm_clock_nsec;
167: uint64_t vdi_size;
168: uint64_t vm_state_size;
169: uint16_t copy_policy;
170: uint8_t nr_copies;
171: uint8_t block_size_shift;
172: uint32_t snap_id;
173: uint32_t vdi_id;
174: uint32_t parent_vdi_id;
175: uint32_t child_vdi_id[MAX_CHILDREN];
176: uint32_t data_vdi_id[MAX_DATA_OBJS];
177: } SheepdogInode;
178:
179: /*
180: * 64 bit FNV-1a non-zero initial basis
181: */
182: #define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL)
183:
184: /*
185: * 64 bit Fowler/Noll/Vo FNV-1a hash code
186: */
187: static inline uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
188: {
189: unsigned char *bp = buf;
190: unsigned char *be = bp + len;
191: while (bp < be) {
192: hval ^= (uint64_t) *bp++;
193: hval += (hval << 1) + (hval << 4) + (hval << 5) +
194: (hval << 7) + (hval << 8) + (hval << 40);
195: }
196: return hval;
197: }
198:
1.1.1.3 ! root 199: static inline int is_data_obj_writable(SheepdogInode *inode, unsigned int idx)
1.1 root 200: {
201: return inode->vdi_id == inode->data_vdi_id[idx];
202: }
203:
204: static inline int is_data_obj(uint64_t oid)
205: {
206: return !(VDI_BIT & oid);
207: }
208:
209: static inline uint64_t data_oid_to_idx(uint64_t oid)
210: {
211: return oid & (MAX_DATA_OBJS - 1);
212: }
213:
214: static inline uint64_t vid_to_vdi_oid(uint32_t vid)
215: {
216: return VDI_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT);
217: }
218:
219: static inline uint64_t vid_to_vmstate_oid(uint32_t vid, uint32_t idx)
220: {
221: return VMSTATE_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
222: }
223:
224: static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx)
225: {
226: return ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
227: }
228:
229: static inline int is_snapshot(struct SheepdogInode *inode)
230: {
231: return !!inode->snap_ctime;
232: }
233:
234: #undef dprintf
235: #ifdef DEBUG_SDOG
236: #define dprintf(fmt, args...) \
237: do { \
238: fprintf(stdout, "%s %d: " fmt, __func__, __LINE__, ##args); \
239: } while (0)
240: #else
241: #define dprintf(fmt, args...)
242: #endif
243:
244: typedef struct SheepdogAIOCB SheepdogAIOCB;
245:
246: typedef struct AIOReq {
247: SheepdogAIOCB *aiocb;
248: unsigned int iov_offset;
249:
250: uint64_t oid;
251: uint64_t base_oid;
252: uint64_t offset;
253: unsigned int data_len;
254: uint8_t flags;
255: uint32_t id;
256:
257: QLIST_ENTRY(AIOReq) outstanding_aio_siblings;
258: QLIST_ENTRY(AIOReq) aioreq_siblings;
259: } AIOReq;
260:
261: enum AIOCBState {
262: AIOCB_WRITE_UDATA,
263: AIOCB_READ_UDATA,
264: };
265:
266: struct SheepdogAIOCB {
267: BlockDriverAIOCB common;
268:
269: QEMUIOVector *qiov;
270:
271: int64_t sector_num;
272: int nb_sectors;
273:
274: int ret;
275: enum AIOCBState aiocb_type;
276:
277: QEMUBH *bh;
278: void (*aio_done_func)(SheepdogAIOCB *);
279:
280: int canceled;
281:
282: QLIST_HEAD(aioreq_head, AIOReq) aioreq_head;
283: };
284:
285: typedef struct BDRVSheepdogState {
286: SheepdogInode inode;
287:
288: uint32_t min_dirty_data_idx;
289: uint32_t max_dirty_data_idx;
290:
291: char name[SD_MAX_VDI_LEN];
292: int is_snapshot;
293:
294: char *addr;
295: char *port;
296: int fd;
297:
298: uint32_t aioreq_seq_num;
299: QLIST_HEAD(outstanding_aio_head, AIOReq) outstanding_aio_head;
300: } BDRVSheepdogState;
301:
302: static const char * sd_strerror(int err)
303: {
304: int i;
305:
306: static const struct {
307: int err;
308: const char *desc;
309: } errors[] = {
310: {SD_RES_SUCCESS, "Success"},
311: {SD_RES_UNKNOWN, "Unknown error"},
312: {SD_RES_NO_OBJ, "No object found"},
313: {SD_RES_EIO, "I/O error"},
314: {SD_RES_VDI_EXIST, "VDI exists already"},
315: {SD_RES_INVALID_PARMS, "Invalid parameters"},
316: {SD_RES_SYSTEM_ERROR, "System error"},
317: {SD_RES_VDI_LOCKED, "VDI is already locked"},
318: {SD_RES_NO_VDI, "No vdi found"},
319: {SD_RES_NO_BASE_VDI, "No base VDI found"},
320: {SD_RES_VDI_READ, "Failed read the requested VDI"},
321: {SD_RES_VDI_WRITE, "Failed to write the requested VDI"},
322: {SD_RES_BASE_VDI_READ, "Failed to read the base VDI"},
323: {SD_RES_BASE_VDI_WRITE, "Failed to write the base VDI"},
324: {SD_RES_NO_TAG, "Failed to find the requested tag"},
325: {SD_RES_STARTUP, "The system is still booting"},
326: {SD_RES_VDI_NOT_LOCKED, "VDI isn't locked"},
327: {SD_RES_SHUTDOWN, "The system is shutting down"},
328: {SD_RES_NO_MEM, "Out of memory on the server"},
329: {SD_RES_FULL_VDI, "We already have the maximum vdis"},
330: {SD_RES_VER_MISMATCH, "Protocol version mismatch"},
331: {SD_RES_NO_SPACE, "Server has no space for new objects"},
332: {SD_RES_WAIT_FOR_FORMAT, "Sheepdog is waiting for a format operation"},
333: {SD_RES_WAIT_FOR_JOIN, "Sheepdog is waiting for other nodes joining"},
334: {SD_RES_JOIN_FAILED, "Target node had failed to join sheepdog"},
335: };
336:
337: for (i = 0; i < ARRAY_SIZE(errors); ++i) {
338: if (errors[i].err == err) {
339: return errors[i].desc;
340: }
341: }
342:
343: return "Invalid error code";
344: }
345:
346: /*
347: * Sheepdog I/O handling:
348: *
349: * 1. In the sd_aio_readv/writev, read/write requests are added to the
350: * QEMU Bottom Halves.
351: *
352: * 2. In sd_readv_writev_bh_cb, the callbacks of BHs, we send the I/O
353: * requests to the server and link the requests to the
354: * outstanding_list in the BDRVSheepdogState. we exits the
355: * function without waiting for receiving the response.
356: *
357: * 3. We receive the response in aio_read_response, the fd handler to
358: * the sheepdog connection. If metadata update is needed, we send
359: * the write request to the vdi object in sd_write_done, the write
360: * completion function. The AIOCB callback is not called until all
361: * the requests belonging to the AIOCB are finished.
362: */
363:
364: static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
365: uint64_t oid, unsigned int data_len,
366: uint64_t offset, uint8_t flags,
367: uint64_t base_oid, unsigned int iov_offset)
368: {
369: AIOReq *aio_req;
370:
371: aio_req = qemu_malloc(sizeof(*aio_req));
372: aio_req->aiocb = acb;
373: aio_req->iov_offset = iov_offset;
374: aio_req->oid = oid;
375: aio_req->base_oid = base_oid;
376: aio_req->offset = offset;
377: aio_req->data_len = data_len;
378: aio_req->flags = flags;
379: aio_req->id = s->aioreq_seq_num++;
380:
381: QLIST_INSERT_HEAD(&s->outstanding_aio_head, aio_req,
382: outstanding_aio_siblings);
383: QLIST_INSERT_HEAD(&acb->aioreq_head, aio_req, aioreq_siblings);
384:
385: return aio_req;
386: }
387:
388: static inline int free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
389: {
390: SheepdogAIOCB *acb = aio_req->aiocb;
391: QLIST_REMOVE(aio_req, outstanding_aio_siblings);
392: QLIST_REMOVE(aio_req, aioreq_siblings);
393: qemu_free(aio_req);
394:
395: return !QLIST_EMPTY(&acb->aioreq_head);
396: }
397:
398: static void sd_finish_aiocb(SheepdogAIOCB *acb)
399: {
400: if (!acb->canceled) {
401: acb->common.cb(acb->common.opaque, acb->ret);
402: }
403: qemu_aio_release(acb);
404: }
405:
406: static void sd_aio_cancel(BlockDriverAIOCB *blockacb)
407: {
408: SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb;
409:
410: /*
411: * Sheepdog cannot cancel the requests which are already sent to
412: * the servers, so we just complete the request with -EIO here.
413: */
414: acb->common.cb(acb->common.opaque, -EIO);
415: acb->canceled = 1;
416: }
417:
418: static AIOPool sd_aio_pool = {
419: .aiocb_size = sizeof(SheepdogAIOCB),
420: .cancel = sd_aio_cancel,
421: };
422:
423: static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
424: int64_t sector_num, int nb_sectors,
425: BlockDriverCompletionFunc *cb, void *opaque)
426: {
427: SheepdogAIOCB *acb;
428:
429: acb = qemu_aio_get(&sd_aio_pool, bs, cb, opaque);
430:
431: acb->qiov = qiov;
432:
433: acb->sector_num = sector_num;
434: acb->nb_sectors = nb_sectors;
435:
436: acb->aio_done_func = NULL;
437: acb->canceled = 0;
438: acb->bh = NULL;
439: acb->ret = 0;
440: QLIST_INIT(&acb->aioreq_head);
441: return acb;
442: }
443:
444: static int sd_schedule_bh(QEMUBHFunc *cb, SheepdogAIOCB *acb)
445: {
446: if (acb->bh) {
1.1.1.3 ! root 447: error_report("bug: %d %d", acb->aiocb_type, acb->aiocb_type);
1.1 root 448: return -EIO;
449: }
450:
451: acb->bh = qemu_bh_new(cb, acb);
452: qemu_bh_schedule(acb->bh);
453: return 0;
454: }
455:
456: #ifdef _WIN32
457:
458: struct msghdr {
459: struct iovec *msg_iov;
460: size_t msg_iovlen;
461: };
462:
463: static ssize_t sendmsg(int s, const struct msghdr *msg, int flags)
464: {
465: size_t size = 0;
466: char *buf, *p;
467: int i, ret;
468:
469: /* count the msg size */
470: for (i = 0; i < msg->msg_iovlen; i++) {
471: size += msg->msg_iov[i].iov_len;
472: }
473: buf = qemu_malloc(size);
474:
475: p = buf;
476: for (i = 0; i < msg->msg_iovlen; i++) {
477: memcpy(p, msg->msg_iov[i].iov_base, msg->msg_iov[i].iov_len);
478: p += msg->msg_iov[i].iov_len;
479: }
480:
481: ret = send(s, buf, size, flags);
482:
483: qemu_free(buf);
484: return ret;
485: }
486:
487: static ssize_t recvmsg(int s, struct msghdr *msg, int flags)
488: {
489: size_t size = 0;
490: char *buf, *p;
491: int i, ret;
492:
493: /* count the msg size */
494: for (i = 0; i < msg->msg_iovlen; i++) {
495: size += msg->msg_iov[i].iov_len;
496: }
497: buf = qemu_malloc(size);
498:
1.1.1.3 ! root 499: ret = qemu_recv(s, buf, size, flags);
1.1 root 500: if (ret < 0) {
501: goto out;
502: }
503:
504: p = buf;
505: for (i = 0; i < msg->msg_iovlen; i++) {
506: memcpy(msg->msg_iov[i].iov_base, p, msg->msg_iov[i].iov_len);
507: p += msg->msg_iov[i].iov_len;
508: }
509: out:
510: qemu_free(buf);
511: return ret;
512: }
513:
514: #endif
515:
516: /*
517: * Send/recv data with iovec buffers
518: *
519: * This function send/recv data from/to the iovec buffer directly.
520: * The first `offset' bytes in the iovec buffer are skipped and next
521: * `len' bytes are used.
522: *
523: * For example,
524: *
525: * do_send_recv(sockfd, iov, len, offset, 1);
526: *
527: * is equals to
528: *
529: * char *buf = malloc(size);
530: * iov_to_buf(iov, iovcnt, buf, offset, size);
531: * send(sockfd, buf, size, 0);
532: * free(buf);
533: */
534: static int do_send_recv(int sockfd, struct iovec *iov, int len, int offset,
535: int write)
536: {
537: struct msghdr msg;
538: int ret, diff;
539:
540: memset(&msg, 0, sizeof(msg));
541: msg.msg_iov = iov;
542: msg.msg_iovlen = 1;
543:
544: len += offset;
545:
546: while (iov->iov_len < len) {
547: len -= iov->iov_len;
548:
549: iov++;
550: msg.msg_iovlen++;
551: }
552:
553: diff = iov->iov_len - len;
554: iov->iov_len -= diff;
555:
556: while (msg.msg_iov->iov_len <= offset) {
557: offset -= msg.msg_iov->iov_len;
558:
559: msg.msg_iov++;
560: msg.msg_iovlen--;
561: }
562:
563: msg.msg_iov->iov_base = (char *) msg.msg_iov->iov_base + offset;
564: msg.msg_iov->iov_len -= offset;
565:
566: if (write) {
567: ret = sendmsg(sockfd, &msg, 0);
568: } else {
569: ret = recvmsg(sockfd, &msg, 0);
570: }
571:
572: msg.msg_iov->iov_base = (char *) msg.msg_iov->iov_base - offset;
573: msg.msg_iov->iov_len += offset;
574:
575: iov->iov_len += diff;
576: return ret;
577: }
578:
579: static int connect_to_sdog(const char *addr, const char *port)
580: {
581: char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];
582: int fd, ret;
583: struct addrinfo hints, *res, *res0;
584:
585: if (!addr) {
586: addr = SD_DEFAULT_ADDR;
587: port = SD_DEFAULT_PORT;
588: }
589:
590: memset(&hints, 0, sizeof(hints));
591: hints.ai_socktype = SOCK_STREAM;
592:
593: ret = getaddrinfo(addr, port, &hints, &res0);
594: if (ret) {
1.1.1.3 ! root 595: error_report("unable to get address info %s, %s",
1.1 root 596: addr, strerror(errno));
597: return -1;
598: }
599:
600: for (res = res0; res; res = res->ai_next) {
601: ret = getnameinfo(res->ai_addr, res->ai_addrlen, hbuf, sizeof(hbuf),
602: sbuf, sizeof(sbuf), NI_NUMERICHOST | NI_NUMERICSERV);
603: if (ret) {
604: continue;
605: }
606:
607: fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
608: if (fd < 0) {
609: continue;
610: }
611:
612: reconnect:
613: ret = connect(fd, res->ai_addr, res->ai_addrlen);
614: if (ret < 0) {
615: if (errno == EINTR) {
616: goto reconnect;
617: }
618: break;
619: }
620:
621: dprintf("connected to %s:%s\n", addr, port);
622: goto success;
623: }
624: fd = -1;
1.1.1.3 ! root 625: error_report("failed connect to %s:%s", addr, port);
1.1 root 626: success:
627: freeaddrinfo(res0);
628: return fd;
629: }
630:
631: static int do_readv_writev(int sockfd, struct iovec *iov, int len,
632: int iov_offset, int write)
633: {
634: int ret;
635: again:
636: ret = do_send_recv(sockfd, iov, len, iov_offset, write);
637: if (ret < 0) {
638: if (errno == EINTR || errno == EAGAIN) {
639: goto again;
640: }
1.1.1.3 ! root 641: error_report("failed to recv a rsp, %s", strerror(errno));
1.1 root 642: return 1;
643: }
644:
645: iov_offset += ret;
646: len -= ret;
647: if (len) {
648: goto again;
649: }
650:
651: return 0;
652: }
653:
654: static int do_readv(int sockfd, struct iovec *iov, int len, int iov_offset)
655: {
656: return do_readv_writev(sockfd, iov, len, iov_offset, 0);
657: }
658:
659: static int do_writev(int sockfd, struct iovec *iov, int len, int iov_offset)
660: {
661: return do_readv_writev(sockfd, iov, len, iov_offset, 1);
662: }
663:
664: static int do_read_write(int sockfd, void *buf, int len, int write)
665: {
666: struct iovec iov;
667:
668: iov.iov_base = buf;
669: iov.iov_len = len;
670:
671: return do_readv_writev(sockfd, &iov, len, 0, write);
672: }
673:
674: static int do_read(int sockfd, void *buf, int len)
675: {
676: return do_read_write(sockfd, buf, len, 0);
677: }
678:
679: static int do_write(int sockfd, void *buf, int len)
680: {
681: return do_read_write(sockfd, buf, len, 1);
682: }
683:
684: static int send_req(int sockfd, SheepdogReq *hdr, void *data,
685: unsigned int *wlen)
686: {
687: int ret;
688: struct iovec iov[2];
689:
690: iov[0].iov_base = hdr;
691: iov[0].iov_len = sizeof(*hdr);
692:
693: if (*wlen) {
694: iov[1].iov_base = data;
695: iov[1].iov_len = *wlen;
696: }
697:
698: ret = do_writev(sockfd, iov, sizeof(*hdr) + *wlen, 0);
699: if (ret) {
1.1.1.3 ! root 700: error_report("failed to send a req, %s", strerror(errno));
1.1 root 701: ret = -1;
702: }
703:
704: return ret;
705: }
706:
707: static int do_req(int sockfd, SheepdogReq *hdr, void *data,
708: unsigned int *wlen, unsigned int *rlen)
709: {
710: int ret;
711:
712: ret = send_req(sockfd, hdr, data, wlen);
713: if (ret) {
714: ret = -1;
715: goto out;
716: }
717:
718: ret = do_read(sockfd, hdr, sizeof(*hdr));
719: if (ret) {
1.1.1.3 ! root 720: error_report("failed to get a rsp, %s", strerror(errno));
1.1 root 721: ret = -1;
722: goto out;
723: }
724:
725: if (*rlen > hdr->data_length) {
726: *rlen = hdr->data_length;
727: }
728:
729: if (*rlen) {
730: ret = do_read(sockfd, data, *rlen);
731: if (ret) {
1.1.1.3 ! root 732: error_report("failed to get the data, %s", strerror(errno));
1.1 root 733: ret = -1;
734: goto out;
735: }
736: }
737: ret = 0;
738: out:
739: return ret;
740: }
741:
742: static int add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
743: struct iovec *iov, int niov, int create,
744: enum AIOCBState aiocb_type);
745:
746: /*
747: * This function searchs pending requests to the object `oid', and
748: * sends them.
749: */
750: static void send_pending_req(BDRVSheepdogState *s, uint64_t oid, uint32_t id)
751: {
752: AIOReq *aio_req, *next;
753: SheepdogAIOCB *acb;
754: int ret;
755:
756: QLIST_FOREACH_SAFE(aio_req, &s->outstanding_aio_head,
757: outstanding_aio_siblings, next) {
758: if (id == aio_req->id) {
759: continue;
760: }
761: if (aio_req->oid != oid) {
762: continue;
763: }
764:
765: acb = aio_req->aiocb;
766: ret = add_aio_request(s, aio_req, acb->qiov->iov,
767: acb->qiov->niov, 0, acb->aiocb_type);
768: if (ret < 0) {
1.1.1.3 ! root 769: error_report("add_aio_request is failed");
1.1 root 770: free_aio_req(s, aio_req);
771: if (QLIST_EMPTY(&acb->aioreq_head)) {
772: sd_finish_aiocb(acb);
773: }
774: }
775: }
776: }
777:
778: /*
779: * Receive responses of the I/O requests.
780: *
781: * This function is registered as a fd handler, and called from the
782: * main loop when s->fd is ready for reading responses.
783: */
784: static void aio_read_response(void *opaque)
785: {
786: SheepdogObjRsp rsp;
787: BDRVSheepdogState *s = opaque;
788: int fd = s->fd;
789: int ret;
790: AIOReq *aio_req = NULL;
791: SheepdogAIOCB *acb;
792: int rest;
793: unsigned long idx;
794:
795: if (QLIST_EMPTY(&s->outstanding_aio_head)) {
796: return;
797: }
798:
799: /* read a header */
800: ret = do_read(fd, &rsp, sizeof(rsp));
801: if (ret) {
1.1.1.3 ! root 802: error_report("failed to get the header, %s", strerror(errno));
1.1 root 803: return;
804: }
805:
806: /* find the right aio_req from the outstanding_aio list */
807: QLIST_FOREACH(aio_req, &s->outstanding_aio_head, outstanding_aio_siblings) {
808: if (aio_req->id == rsp.id) {
809: break;
810: }
811: }
812: if (!aio_req) {
1.1.1.3 ! root 813: error_report("cannot find aio_req %x", rsp.id);
1.1 root 814: return;
815: }
816:
817: acb = aio_req->aiocb;
818:
819: switch (acb->aiocb_type) {
820: case AIOCB_WRITE_UDATA:
821: if (!is_data_obj(aio_req->oid)) {
822: break;
823: }
824: idx = data_oid_to_idx(aio_req->oid);
825:
826: if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) {
827: /*
828: * If the object is newly created one, we need to update
829: * the vdi object (metadata object). min_dirty_data_idx
830: * and max_dirty_data_idx are changed to include updated
831: * index between them.
832: */
833: s->inode.data_vdi_id[idx] = s->inode.vdi_id;
834: s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx);
835: s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx);
836:
837: /*
838: * Some requests may be blocked because simultaneous
839: * create requests are not allowed, so we search the
840: * pending requests here.
841: */
842: send_pending_req(s, vid_to_data_oid(s->inode.vdi_id, idx), rsp.id);
843: }
844: break;
845: case AIOCB_READ_UDATA:
846: ret = do_readv(fd, acb->qiov->iov, rsp.data_length,
847: aio_req->iov_offset);
848: if (ret) {
1.1.1.3 ! root 849: error_report("failed to get the data, %s", strerror(errno));
1.1 root 850: return;
851: }
852: break;
853: }
854:
855: if (rsp.result != SD_RES_SUCCESS) {
856: acb->ret = -EIO;
1.1.1.3 ! root 857: error_report("%s", sd_strerror(rsp.result));
1.1 root 858: }
859:
860: rest = free_aio_req(s, aio_req);
861: if (!rest) {
862: /*
863: * We've finished all requests which belong to the AIOCB, so
864: * we can call the callback now.
865: */
866: acb->aio_done_func(acb);
867: }
868: }
869:
870: static int aio_flush_request(void *opaque)
871: {
872: BDRVSheepdogState *s = opaque;
873:
874: return !QLIST_EMPTY(&s->outstanding_aio_head);
875: }
876:
877: #if !defined(SOL_TCP) || !defined(TCP_CORK)
878:
879: static int set_cork(int fd, int v)
880: {
881: return 0;
882: }
883:
884: #else
885:
886: static int set_cork(int fd, int v)
887: {
888: return setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v));
889: }
890:
891: #endif
892:
893: static int set_nodelay(int fd)
894: {
895: int ret, opt;
896:
897: opt = 1;
898: ret = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, (char *)&opt, sizeof(opt));
899: return ret;
900: }
901:
902: /*
903: * Return a socket discriptor to read/write objects.
904: *
905: * We cannot use this discriptor for other operations because
906: * the block driver may be on waiting response from the server.
907: */
908: static int get_sheep_fd(BDRVSheepdogState *s)
909: {
910: int ret, fd;
911:
912: fd = connect_to_sdog(s->addr, s->port);
913: if (fd < 0) {
1.1.1.3 ! root 914: error_report("%s", strerror(errno));
1.1 root 915: return -1;
916: }
917:
918: socket_set_nonblock(fd);
919:
920: ret = set_nodelay(fd);
921: if (ret) {
1.1.1.3 ! root 922: error_report("%s", strerror(errno));
1.1 root 923: closesocket(fd);
924: return -1;
925: }
926:
927: qemu_aio_set_fd_handler(fd, aio_read_response, NULL, aio_flush_request,
928: NULL, s);
929: return fd;
930: }
931:
932: /*
933: * Parse a filename
934: *
935: * filename must be one of the following formats:
936: * 1. [vdiname]
937: * 2. [vdiname]:[snapid]
938: * 3. [vdiname]:[tag]
939: * 4. [hostname]:[port]:[vdiname]
940: * 5. [hostname]:[port]:[vdiname]:[snapid]
941: * 6. [hostname]:[port]:[vdiname]:[tag]
942: *
943: * You can boot from the snapshot images by specifying `snapid` or
944: * `tag'.
945: *
946: * You can run VMs outside the Sheepdog cluster by specifying
947: * `hostname' and `port' (experimental).
948: */
949: static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
950: char *vdi, uint32_t *snapid, char *tag)
951: {
952: char *p, *q;
953: int nr_sep;
954:
955: p = q = qemu_strdup(filename);
956:
957: /* count the number of separators */
958: nr_sep = 0;
959: while (*p) {
960: if (*p == ':') {
961: nr_sep++;
962: }
963: p++;
964: }
965: p = q;
966:
967: /* use the first two tokens as hostname and port number. */
968: if (nr_sep >= 2) {
969: s->addr = p;
970: p = strchr(p, ':');
971: *p++ = '\0';
972:
973: s->port = p;
974: p = strchr(p, ':');
975: *p++ = '\0';
976: } else {
977: s->addr = NULL;
978: s->port = 0;
979: }
980:
981: strncpy(vdi, p, SD_MAX_VDI_LEN);
982:
983: p = strchr(vdi, ':');
984: if (p) {
985: *p++ = '\0';
986: *snapid = strtoul(p, NULL, 10);
987: if (*snapid == 0) {
988: strncpy(tag, p, SD_MAX_VDI_TAG_LEN);
989: }
990: } else {
991: *snapid = CURRENT_VDI_ID; /* search current vdi */
992: }
993:
994: if (s->addr == NULL) {
995: qemu_free(q);
996: }
997:
998: return 0;
999: }
1000:
1001: static int find_vdi_name(BDRVSheepdogState *s, char *filename, uint32_t snapid,
1002: char *tag, uint32_t *vid, int for_snapshot)
1003: {
1004: int ret, fd;
1005: SheepdogVdiReq hdr;
1006: SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
1007: unsigned int wlen, rlen = 0;
1008: char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
1009:
1010: fd = connect_to_sdog(s->addr, s->port);
1011: if (fd < 0) {
1012: return -1;
1013: }
1014:
1015: memset(buf, 0, sizeof(buf));
1016: strncpy(buf, filename, SD_MAX_VDI_LEN);
1017: strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN);
1018:
1019: memset(&hdr, 0, sizeof(hdr));
1020: if (for_snapshot) {
1021: hdr.opcode = SD_OP_GET_VDI_INFO;
1022: } else {
1023: hdr.opcode = SD_OP_LOCK_VDI;
1024: }
1025: wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN;
1026: hdr.proto_ver = SD_PROTO_VER;
1027: hdr.data_length = wlen;
1028: hdr.snapid = snapid;
1029: hdr.flags = SD_FLAG_CMD_WRITE;
1030:
1031: ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
1032: if (ret) {
1033: ret = -1;
1034: goto out;
1035: }
1036:
1037: if (rsp->result != SD_RES_SUCCESS) {
1.1.1.3 ! root 1038: error_report("cannot get vdi info, %s, %s %d %s",
1.1 root 1039: sd_strerror(rsp->result), filename, snapid, tag);
1040: ret = -1;
1041: goto out;
1042: }
1043: *vid = rsp->vdi_id;
1044:
1045: ret = 0;
1046: out:
1047: closesocket(fd);
1048: return ret;
1049: }
1050:
1051: static int add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
1052: struct iovec *iov, int niov, int create,
1053: enum AIOCBState aiocb_type)
1054: {
1055: int nr_copies = s->inode.nr_copies;
1056: SheepdogObjReq hdr;
1057: unsigned int wlen;
1058: int ret;
1059: uint64_t oid = aio_req->oid;
1060: unsigned int datalen = aio_req->data_len;
1061: uint64_t offset = aio_req->offset;
1062: uint8_t flags = aio_req->flags;
1063: uint64_t old_oid = aio_req->base_oid;
1064:
1065: if (!nr_copies) {
1.1.1.3 ! root 1066: error_report("bug");
1.1 root 1067: }
1068:
1069: memset(&hdr, 0, sizeof(hdr));
1070:
1071: if (aiocb_type == AIOCB_READ_UDATA) {
1072: wlen = 0;
1073: hdr.opcode = SD_OP_READ_OBJ;
1074: hdr.flags = flags;
1075: } else if (create) {
1076: wlen = datalen;
1077: hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
1078: hdr.flags = SD_FLAG_CMD_WRITE | flags;
1079: } else {
1080: wlen = datalen;
1081: hdr.opcode = SD_OP_WRITE_OBJ;
1082: hdr.flags = SD_FLAG_CMD_WRITE | flags;
1083: }
1084:
1085: hdr.oid = oid;
1086: hdr.cow_oid = old_oid;
1087: hdr.copies = s->inode.nr_copies;
1088:
1089: hdr.data_length = datalen;
1090: hdr.offset = offset;
1091:
1092: hdr.id = aio_req->id;
1093:
1094: set_cork(s->fd, 1);
1095:
1096: /* send a header */
1097: ret = do_write(s->fd, &hdr, sizeof(hdr));
1098: if (ret) {
1.1.1.3 ! root 1099: error_report("failed to send a req, %s", strerror(errno));
1.1 root 1100: return -EIO;
1101: }
1102:
1103: if (wlen) {
1104: ret = do_writev(s->fd, iov, wlen, aio_req->iov_offset);
1105: if (ret) {
1.1.1.3 ! root 1106: error_report("failed to send a data, %s", strerror(errno));
1.1 root 1107: return -EIO;
1108: }
1109: }
1110:
1111: set_cork(s->fd, 0);
1112:
1113: return 0;
1114: }
1115:
1116: static int read_write_object(int fd, char *buf, uint64_t oid, int copies,
1117: unsigned int datalen, uint64_t offset,
1118: int write, int create)
1119: {
1120: SheepdogObjReq hdr;
1121: SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
1122: unsigned int wlen, rlen;
1123: int ret;
1124:
1125: memset(&hdr, 0, sizeof(hdr));
1126:
1127: if (write) {
1128: wlen = datalen;
1129: rlen = 0;
1130: hdr.flags = SD_FLAG_CMD_WRITE;
1131: if (create) {
1132: hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
1133: } else {
1134: hdr.opcode = SD_OP_WRITE_OBJ;
1135: }
1136: } else {
1137: wlen = 0;
1138: rlen = datalen;
1139: hdr.opcode = SD_OP_READ_OBJ;
1140: }
1141: hdr.oid = oid;
1142: hdr.data_length = datalen;
1143: hdr.offset = offset;
1144: hdr.copies = copies;
1145:
1146: ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
1147: if (ret) {
1.1.1.3 ! root 1148: error_report("failed to send a request to the sheep");
1.1 root 1149: return -1;
1150: }
1151:
1152: switch (rsp->result) {
1153: case SD_RES_SUCCESS:
1154: return 0;
1155: default:
1.1.1.3 ! root 1156: error_report("%s", sd_strerror(rsp->result));
1.1 root 1157: return -1;
1158: }
1159: }
1160:
1161: static int read_object(int fd, char *buf, uint64_t oid, int copies,
1162: unsigned int datalen, uint64_t offset)
1163: {
1164: return read_write_object(fd, buf, oid, copies, datalen, offset, 0, 0);
1165: }
1166:
1167: static int write_object(int fd, char *buf, uint64_t oid, int copies,
1168: unsigned int datalen, uint64_t offset, int create)
1169: {
1170: return read_write_object(fd, buf, oid, copies, datalen, offset, 1, create);
1171: }
1172:
1173: static int sd_open(BlockDriverState *bs, const char *filename, int flags)
1174: {
1175: int ret, fd;
1176: uint32_t vid = 0;
1177: BDRVSheepdogState *s = bs->opaque;
1178: char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
1179: uint32_t snapid;
1180: char *buf = NULL;
1181:
1182: strstart(filename, "sheepdog:", (const char **)&filename);
1183:
1184: QLIST_INIT(&s->outstanding_aio_head);
1185: s->fd = -1;
1186:
1187: memset(vdi, 0, sizeof(vdi));
1188: memset(tag, 0, sizeof(tag));
1189: if (parse_vdiname(s, filename, vdi, &snapid, tag) < 0) {
1190: goto out;
1191: }
1192: s->fd = get_sheep_fd(s);
1193: if (s->fd < 0) {
1194: goto out;
1195: }
1196:
1197: ret = find_vdi_name(s, vdi, snapid, tag, &vid, 0);
1198: if (ret) {
1199: goto out;
1200: }
1201:
1202: if (snapid) {
1203: dprintf("%" PRIx32 " snapshot inode was open.\n", vid);
1204: s->is_snapshot = 1;
1205: }
1206:
1207: fd = connect_to_sdog(s->addr, s->port);
1208: if (fd < 0) {
1.1.1.3 ! root 1209: error_report("failed to connect");
1.1 root 1210: goto out;
1211: }
1212:
1213: buf = qemu_malloc(SD_INODE_SIZE);
1214: ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0);
1215:
1216: closesocket(fd);
1217:
1218: if (ret) {
1219: goto out;
1220: }
1221:
1222: memcpy(&s->inode, buf, sizeof(s->inode));
1223: s->min_dirty_data_idx = UINT32_MAX;
1224: s->max_dirty_data_idx = 0;
1225:
1226: bs->total_sectors = s->inode.vdi_size / SECTOR_SIZE;
1227: strncpy(s->name, vdi, sizeof(s->name));
1228: qemu_free(buf);
1229: return 0;
1230: out:
1231: qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL, NULL);
1232: if (s->fd >= 0) {
1233: closesocket(s->fd);
1234: }
1235: qemu_free(buf);
1236: return -1;
1237: }
1238:
1239: static int do_sd_create(char *filename, int64_t vdi_size,
1240: uint32_t base_vid, uint32_t *vdi_id, int snapshot,
1241: const char *addr, const char *port)
1242: {
1243: SheepdogVdiReq hdr;
1244: SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
1245: int fd, ret;
1246: unsigned int wlen, rlen = 0;
1247: char buf[SD_MAX_VDI_LEN];
1248:
1249: fd = connect_to_sdog(addr, port);
1250: if (fd < 0) {
1251: return -EIO;
1252: }
1253:
1254: memset(buf, 0, sizeof(buf));
1255: strncpy(buf, filename, SD_MAX_VDI_LEN);
1256:
1257: memset(&hdr, 0, sizeof(hdr));
1258: hdr.opcode = SD_OP_NEW_VDI;
1259: hdr.base_vdi_id = base_vid;
1260:
1261: wlen = SD_MAX_VDI_LEN;
1262:
1263: hdr.flags = SD_FLAG_CMD_WRITE;
1264: hdr.snapid = snapshot;
1265:
1266: hdr.data_length = wlen;
1267: hdr.vdi_size = vdi_size;
1268:
1269: ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
1270:
1271: closesocket(fd);
1272:
1273: if (ret) {
1274: return -EIO;
1275: }
1276:
1277: if (rsp->result != SD_RES_SUCCESS) {
1.1.1.3 ! root 1278: error_report("%s, %s", sd_strerror(rsp->result), filename);
1.1 root 1279: return -EIO;
1280: }
1281:
1282: if (vdi_id) {
1283: *vdi_id = rsp->vdi_id;
1284: }
1285:
1286: return 0;
1287: }
1288:
1.1.1.3 ! root 1289: static int sd_prealloc(const char *filename)
! 1290: {
! 1291: BlockDriverState *bs = NULL;
! 1292: uint32_t idx, max_idx;
! 1293: int64_t vdi_size;
! 1294: void *buf = qemu_mallocz(SD_DATA_OBJ_SIZE);
! 1295: int ret;
! 1296:
! 1297: ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR);
! 1298: if (ret < 0) {
! 1299: goto out;
! 1300: }
! 1301:
! 1302: vdi_size = bdrv_getlength(bs);
! 1303: if (vdi_size < 0) {
! 1304: ret = vdi_size;
! 1305: goto out;
! 1306: }
! 1307: max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE);
! 1308:
! 1309: for (idx = 0; idx < max_idx; idx++) {
! 1310: /*
! 1311: * The created image can be a cloned image, so we need to read
! 1312: * a data from the source image.
! 1313: */
! 1314: ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
! 1315: if (ret < 0) {
! 1316: goto out;
! 1317: }
! 1318: ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
! 1319: if (ret < 0) {
! 1320: goto out;
! 1321: }
! 1322: }
! 1323: out:
! 1324: if (bs) {
! 1325: bdrv_delete(bs);
! 1326: }
! 1327: qemu_free(buf);
! 1328:
! 1329: return ret;
! 1330: }
! 1331:
1.1 root 1332: static int sd_create(const char *filename, QEMUOptionParameter *options)
1333: {
1334: int ret;
1.1.1.2 root 1335: uint32_t vid = 0, base_vid = 0;
1.1 root 1336: int64_t vdi_size = 0;
1337: char *backing_file = NULL;
1.1.1.2 root 1338: BDRVSheepdogState s;
1339: char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
1340: uint32_t snapid;
1.1.1.3 ! root 1341: int prealloc = 0;
! 1342: const char *vdiname;
1.1 root 1343:
1.1.1.3 ! root 1344: strstart(filename, "sheepdog:", &vdiname);
1.1 root 1345:
1.1.1.2 root 1346: memset(&s, 0, sizeof(s));
1347: memset(vdi, 0, sizeof(vdi));
1348: memset(tag, 0, sizeof(tag));
1.1.1.3 ! root 1349: if (parse_vdiname(&s, vdiname, vdi, &snapid, tag) < 0) {
! 1350: error_report("invalid filename");
1.1.1.2 root 1351: return -EINVAL;
1352: }
1353:
1.1 root 1354: while (options && options->name) {
1355: if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
1356: vdi_size = options->value.n;
1357: } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
1358: backing_file = options->value.s;
1.1.1.3 ! root 1359: } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
! 1360: if (!options->value.s || !strcmp(options->value.s, "off")) {
! 1361: prealloc = 0;
! 1362: } else if (!strcmp(options->value.s, "full")) {
! 1363: prealloc = 1;
! 1364: } else {
! 1365: error_report("Invalid preallocation mode: '%s'",
! 1366: options->value.s);
! 1367: return -EINVAL;
! 1368: }
1.1 root 1369: }
1370: options++;
1371: }
1372:
1373: if (vdi_size > SD_MAX_VDI_SIZE) {
1.1.1.3 ! root 1374: error_report("too big image size");
1.1 root 1375: return -EINVAL;
1376: }
1377:
1378: if (backing_file) {
1379: BlockDriverState *bs;
1380: BDRVSheepdogState *s;
1381: BlockDriver *drv;
1382:
1383: /* Currently, only Sheepdog backing image is supported. */
1384: drv = bdrv_find_protocol(backing_file);
1385: if (!drv || strcmp(drv->protocol_name, "sheepdog") != 0) {
1.1.1.3 ! root 1386: error_report("backing_file must be a sheepdog image");
1.1 root 1387: return -EINVAL;
1388: }
1389:
1390: ret = bdrv_file_open(&bs, backing_file, 0);
1391: if (ret < 0)
1392: return -EIO;
1393:
1394: s = bs->opaque;
1395:
1396: if (!is_snapshot(&s->inode)) {
1.1.1.3 ! root 1397: error_report("cannot clone from a non snapshot vdi");
1.1 root 1398: bdrv_delete(bs);
1399: return -EINVAL;
1400: }
1401:
1.1.1.2 root 1402: base_vid = s->inode.vdi_id;
1.1 root 1403: bdrv_delete(bs);
1404: }
1405:
1.1.1.3 ! root 1406: ret = do_sd_create(vdi, vdi_size, base_vid, &vid, 0, s.addr, s.port);
! 1407: if (!prealloc || ret) {
! 1408: return ret;
! 1409: }
! 1410:
! 1411: return sd_prealloc(filename);
1.1 root 1412: }
1413:
1414: static void sd_close(BlockDriverState *bs)
1415: {
1416: BDRVSheepdogState *s = bs->opaque;
1417: SheepdogVdiReq hdr;
1418: SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
1419: unsigned int wlen, rlen = 0;
1420: int fd, ret;
1421:
1422: dprintf("%s\n", s->name);
1423:
1424: fd = connect_to_sdog(s->addr, s->port);
1425: if (fd < 0) {
1426: return;
1427: }
1428:
1429: memset(&hdr, 0, sizeof(hdr));
1430:
1431: hdr.opcode = SD_OP_RELEASE_VDI;
1432: wlen = strlen(s->name) + 1;
1433: hdr.data_length = wlen;
1434: hdr.flags = SD_FLAG_CMD_WRITE;
1435:
1436: ret = do_req(fd, (SheepdogReq *)&hdr, s->name, &wlen, &rlen);
1437:
1438: closesocket(fd);
1439:
1440: if (!ret && rsp->result != SD_RES_SUCCESS &&
1441: rsp->result != SD_RES_VDI_NOT_LOCKED) {
1.1.1.3 ! root 1442: error_report("%s, %s", sd_strerror(rsp->result), s->name);
1.1 root 1443: }
1444:
1445: qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL, NULL);
1446: closesocket(s->fd);
1447: qemu_free(s->addr);
1448: }
1449:
1450: static int64_t sd_getlength(BlockDriverState *bs)
1451: {
1452: BDRVSheepdogState *s = bs->opaque;
1453:
1454: return s->inode.vdi_size;
1455: }
1456:
1457: static int sd_truncate(BlockDriverState *bs, int64_t offset)
1458: {
1459: BDRVSheepdogState *s = bs->opaque;
1460: int ret, fd;
1461: unsigned int datalen;
1462:
1463: if (offset < s->inode.vdi_size) {
1.1.1.3 ! root 1464: error_report("shrinking is not supported");
1.1 root 1465: return -EINVAL;
1466: } else if (offset > SD_MAX_VDI_SIZE) {
1.1.1.3 ! root 1467: error_report("too big image size");
1.1 root 1468: return -EINVAL;
1469: }
1470:
1471: fd = connect_to_sdog(s->addr, s->port);
1472: if (fd < 0) {
1473: return -EIO;
1474: }
1475:
1476: /* we don't need to update entire object */
1477: datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
1478: s->inode.vdi_size = offset;
1479: ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
1480: s->inode.nr_copies, datalen, 0, 0);
1481: close(fd);
1482:
1483: if (ret < 0) {
1.1.1.3 ! root 1484: error_report("failed to update an inode.");
1.1 root 1485: return -EIO;
1486: }
1487:
1488: return 0;
1489: }
1490:
1491: /*
1492: * This function is called after writing data objects. If we need to
1493: * update metadata, this sends a write request to the vdi object.
1494: * Otherwise, this calls the AIOCB callback.
1495: */
1496: static void sd_write_done(SheepdogAIOCB *acb)
1497: {
1498: int ret;
1499: BDRVSheepdogState *s = acb->common.bs->opaque;
1500: struct iovec iov;
1501: AIOReq *aio_req;
1502: uint32_t offset, data_len, mn, mx;
1503:
1504: mn = s->min_dirty_data_idx;
1505: mx = s->max_dirty_data_idx;
1506: if (mn <= mx) {
1507: /* we need to update the vdi object. */
1508: offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) +
1509: mn * sizeof(s->inode.data_vdi_id[0]);
1510: data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]);
1511:
1512: s->min_dirty_data_idx = UINT32_MAX;
1513: s->max_dirty_data_idx = 0;
1514:
1515: iov.iov_base = &s->inode;
1516: iov.iov_len = sizeof(s->inode);
1517: aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
1518: data_len, offset, 0, 0, offset);
1519: ret = add_aio_request(s, aio_req, &iov, 1, 0, AIOCB_WRITE_UDATA);
1520: if (ret) {
1521: free_aio_req(s, aio_req);
1522: acb->ret = -EIO;
1523: goto out;
1524: }
1525:
1526: acb->aio_done_func = sd_finish_aiocb;
1527: acb->aiocb_type = AIOCB_WRITE_UDATA;
1528: return;
1529: }
1530: out:
1531: sd_finish_aiocb(acb);
1532: }
1533:
1534: /*
1535: * Create a writable VDI from a snapshot
1536: */
1537: static int sd_create_branch(BDRVSheepdogState *s)
1538: {
1539: int ret, fd;
1540: uint32_t vid;
1541: char *buf;
1542:
1543: dprintf("%" PRIx32 " is snapshot.\n", s->inode.vdi_id);
1544:
1545: buf = qemu_malloc(SD_INODE_SIZE);
1546:
1547: ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &vid, 1,
1548: s->addr, s->port);
1549: if (ret) {
1550: goto out;
1551: }
1552:
1553: dprintf("%" PRIx32 " is created.\n", vid);
1554:
1555: fd = connect_to_sdog(s->addr, s->port);
1556: if (fd < 0) {
1.1.1.3 ! root 1557: error_report("failed to connect");
1.1 root 1558: goto out;
1559: }
1560:
1561: ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
1562: SD_INODE_SIZE, 0);
1563:
1564: closesocket(fd);
1565:
1566: if (ret < 0) {
1567: goto out;
1568: }
1569:
1570: memcpy(&s->inode, buf, sizeof(s->inode));
1571:
1572: s->is_snapshot = 0;
1573: ret = 0;
1574: dprintf("%" PRIx32 " was newly created.\n", s->inode.vdi_id);
1575:
1576: out:
1577: qemu_free(buf);
1578:
1579: return ret;
1580: }
1581:
1582: /*
1583: * Send I/O requests to the server.
1584: *
1585: * This function sends requests to the server, links the requests to
1586: * the outstanding_list in BDRVSheepdogState, and exits without
1587: * waiting the response. The responses are received in the
1588: * `aio_read_response' function which is called from the main loop as
1589: * a fd handler.
1590: */
1591: static void sd_readv_writev_bh_cb(void *p)
1592: {
1593: SheepdogAIOCB *acb = p;
1594: int ret = 0;
1595: unsigned long len, done = 0, total = acb->nb_sectors * SECTOR_SIZE;
1596: unsigned long idx = acb->sector_num * SECTOR_SIZE / SD_DATA_OBJ_SIZE;
1597: uint64_t oid;
1598: uint64_t offset = (acb->sector_num * SECTOR_SIZE) % SD_DATA_OBJ_SIZE;
1599: BDRVSheepdogState *s = acb->common.bs->opaque;
1600: SheepdogInode *inode = &s->inode;
1601: AIOReq *aio_req;
1602:
1603: qemu_bh_delete(acb->bh);
1604: acb->bh = NULL;
1605:
1606: if (acb->aiocb_type == AIOCB_WRITE_UDATA && s->is_snapshot) {
1607: /*
1608: * In the case we open the snapshot VDI, Sheepdog creates the
1609: * writable VDI when we do a write operation first.
1610: */
1611: ret = sd_create_branch(s);
1612: if (ret) {
1613: acb->ret = -EIO;
1614: goto out;
1615: }
1616: }
1617:
1618: while (done != total) {
1619: uint8_t flags = 0;
1620: uint64_t old_oid = 0;
1621: int create = 0;
1622:
1623: oid = vid_to_data_oid(inode->data_vdi_id[idx], idx);
1624:
1625: len = MIN(total - done, SD_DATA_OBJ_SIZE - offset);
1626:
1627: if (!inode->data_vdi_id[idx]) {
1628: if (acb->aiocb_type == AIOCB_READ_UDATA) {
1629: goto done;
1630: }
1631:
1632: create = 1;
1633: } else if (acb->aiocb_type == AIOCB_WRITE_UDATA
1.1.1.3 ! root 1634: && !is_data_obj_writable(inode, idx)) {
1.1 root 1635: /* Copy-On-Write */
1636: create = 1;
1637: old_oid = oid;
1638: flags = SD_FLAG_CMD_COW;
1639: }
1640:
1641: if (create) {
1642: dprintf("update ino (%" PRIu32") %" PRIu64 " %" PRIu64
1643: " %" PRIu64 "\n", inode->vdi_id, oid,
1644: vid_to_data_oid(inode->data_vdi_id[idx], idx), idx);
1645: oid = vid_to_data_oid(inode->vdi_id, idx);
1646: dprintf("new oid %lx\n", oid);
1647: }
1648:
1649: aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done);
1650:
1651: if (create) {
1652: AIOReq *areq;
1653: QLIST_FOREACH(areq, &s->outstanding_aio_head,
1654: outstanding_aio_siblings) {
1655: if (areq == aio_req) {
1656: continue;
1657: }
1658: if (areq->oid == oid) {
1659: /*
1660: * Sheepdog cannot handle simultaneous create
1661: * requests to the same object. So we cannot send
1662: * the request until the previous request
1663: * finishes.
1664: */
1665: aio_req->flags = 0;
1666: aio_req->base_oid = 0;
1667: goto done;
1668: }
1669: }
1670: }
1671:
1672: ret = add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
1673: create, acb->aiocb_type);
1674: if (ret < 0) {
1.1.1.3 ! root 1675: error_report("add_aio_request is failed");
1.1 root 1676: free_aio_req(s, aio_req);
1677: acb->ret = -EIO;
1678: goto out;
1679: }
1680: done:
1681: offset = 0;
1682: idx++;
1683: done += len;
1684: }
1685: out:
1686: if (QLIST_EMPTY(&acb->aioreq_head)) {
1687: sd_finish_aiocb(acb);
1688: }
1689: }
1690:
1691: static BlockDriverAIOCB *sd_aio_writev(BlockDriverState *bs, int64_t sector_num,
1692: QEMUIOVector *qiov, int nb_sectors,
1693: BlockDriverCompletionFunc *cb,
1694: void *opaque)
1695: {
1696: SheepdogAIOCB *acb;
1697:
1698: if (bs->growable && sector_num + nb_sectors > bs->total_sectors) {
1699: /* TODO: shouldn't block here */
1700: if (sd_truncate(bs, (sector_num + nb_sectors) * SECTOR_SIZE) < 0) {
1701: return NULL;
1702: }
1703: bs->total_sectors = sector_num + nb_sectors;
1704: }
1705:
1706: acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, cb, opaque);
1707: acb->aio_done_func = sd_write_done;
1708: acb->aiocb_type = AIOCB_WRITE_UDATA;
1709:
1710: sd_schedule_bh(sd_readv_writev_bh_cb, acb);
1711: return &acb->common;
1712: }
1713:
1714: static BlockDriverAIOCB *sd_aio_readv(BlockDriverState *bs, int64_t sector_num,
1715: QEMUIOVector *qiov, int nb_sectors,
1716: BlockDriverCompletionFunc *cb,
1717: void *opaque)
1718: {
1719: SheepdogAIOCB *acb;
1720: int i;
1721:
1722: acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, cb, opaque);
1723: acb->aiocb_type = AIOCB_READ_UDATA;
1724: acb->aio_done_func = sd_finish_aiocb;
1725:
1726: /*
1727: * TODO: we can do better; we don't need to initialize
1728: * blindly.
1729: */
1730: for (i = 0; i < qiov->niov; i++) {
1731: memset(qiov->iov[i].iov_base, 0, qiov->iov[i].iov_len);
1732: }
1733:
1734: sd_schedule_bh(sd_readv_writev_bh_cb, acb);
1735: return &acb->common;
1736: }
1737:
1738: static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
1739: {
1740: BDRVSheepdogState *s = bs->opaque;
1741: int ret, fd;
1742: uint32_t new_vid;
1743: SheepdogInode *inode;
1744: unsigned int datalen;
1745:
1746: dprintf("sn_info: name %s id_str %s s: name %s vm_state_size %d "
1747: "is_snapshot %d\n", sn_info->name, sn_info->id_str,
1748: s->name, sn_info->vm_state_size, s->is_snapshot);
1749:
1750: if (s->is_snapshot) {
1751: error_report("You can't create a snapshot of a snapshot VDI, "
1.1.1.3 ! root 1752: "%s (%" PRIu32 ").", s->name, s->inode.vdi_id);
1.1 root 1753:
1754: return -EINVAL;
1755: }
1756:
1757: dprintf("%s %s\n", sn_info->name, sn_info->id_str);
1758:
1759: s->inode.vm_state_size = sn_info->vm_state_size;
1760: s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
1761: strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag));
1762: /* we don't need to update entire object */
1763: datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
1764:
1765: /* refresh inode. */
1766: fd = connect_to_sdog(s->addr, s->port);
1767: if (fd < 0) {
1768: ret = -EIO;
1769: goto cleanup;
1770: }
1771:
1772: ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
1773: s->inode.nr_copies, datalen, 0, 0);
1774: if (ret < 0) {
1.1.1.3 ! root 1775: error_report("failed to write snapshot's inode.");
1.1 root 1776: ret = -EIO;
1777: goto cleanup;
1778: }
1779:
1780: ret = do_sd_create(s->name, s->inode.vdi_size, s->inode.vdi_id, &new_vid, 1,
1781: s->addr, s->port);
1782: if (ret < 0) {
1.1.1.3 ! root 1783: error_report("failed to create inode for snapshot. %s",
1.1 root 1784: strerror(errno));
1785: ret = -EIO;
1786: goto cleanup;
1787: }
1788:
1789: inode = (SheepdogInode *)qemu_malloc(datalen);
1790:
1791: ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid),
1792: s->inode.nr_copies, datalen, 0);
1793:
1794: if (ret < 0) {
1.1.1.3 ! root 1795: error_report("failed to read new inode info. %s", strerror(errno));
1.1 root 1796: ret = -EIO;
1797: goto cleanup;
1798: }
1799:
1800: memcpy(&s->inode, inode, datalen);
1801: dprintf("s->inode: name %s snap_id %x oid %x\n",
1802: s->inode.name, s->inode.snap_id, s->inode.vdi_id);
1803:
1804: cleanup:
1805: closesocket(fd);
1806: return ret;
1807: }
1808:
1809: static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
1810: {
1811: BDRVSheepdogState *s = bs->opaque;
1812: BDRVSheepdogState *old_s;
1813: char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
1814: char *buf = NULL;
1815: uint32_t vid;
1816: uint32_t snapid = 0;
1817: int ret = -ENOENT, fd;
1818:
1819: old_s = qemu_malloc(sizeof(BDRVSheepdogState));
1820:
1821: memcpy(old_s, s, sizeof(BDRVSheepdogState));
1822:
1823: memset(vdi, 0, sizeof(vdi));
1824: strncpy(vdi, s->name, sizeof(vdi));
1825:
1826: memset(tag, 0, sizeof(tag));
1827: snapid = strtoul(snapshot_id, NULL, 10);
1828: if (!snapid) {
1829: strncpy(tag, s->name, sizeof(tag));
1830: }
1831:
1832: ret = find_vdi_name(s, vdi, snapid, tag, &vid, 1);
1833: if (ret) {
1.1.1.3 ! root 1834: error_report("Failed to find_vdi_name");
1.1 root 1835: ret = -ENOENT;
1836: goto out;
1837: }
1838:
1839: fd = connect_to_sdog(s->addr, s->port);
1840: if (fd < 0) {
1.1.1.3 ! root 1841: error_report("failed to connect");
1.1 root 1842: goto out;
1843: }
1844:
1845: buf = qemu_malloc(SD_INODE_SIZE);
1846: ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
1847: SD_INODE_SIZE, 0);
1848:
1849: closesocket(fd);
1850:
1851: if (ret) {
1852: ret = -ENOENT;
1853: goto out;
1854: }
1855:
1856: memcpy(&s->inode, buf, sizeof(s->inode));
1857:
1858: if (!s->inode.vm_state_size) {
1.1.1.3 ! root 1859: error_report("Invalid snapshot");
1.1 root 1860: ret = -ENOENT;
1861: goto out;
1862: }
1863:
1864: s->is_snapshot = 1;
1865:
1866: qemu_free(buf);
1867: qemu_free(old_s);
1868:
1869: return 0;
1870: out:
1871: /* recover bdrv_sd_state */
1872: memcpy(s, old_s, sizeof(BDRVSheepdogState));
1873: qemu_free(buf);
1874: qemu_free(old_s);
1875:
1.1.1.3 ! root 1876: error_report("failed to open. recover old bdrv_sd_state.");
1.1 root 1877:
1878: return ret;
1879: }
1880:
1881: static int sd_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1882: {
1883: /* FIXME: Delete specified snapshot id. */
1884: return 0;
1885: }
1886:
1887: static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
1888: {
1889: BDRVSheepdogState *s = bs->opaque;
1890: SheepdogReq req;
1891: int fd, nr = 1024, ret, max = BITS_TO_LONGS(SD_NR_VDIS) * sizeof(long);
1892: QEMUSnapshotInfo *sn_tab = NULL;
1893: unsigned wlen, rlen;
1894: int found = 0;
1895: static SheepdogInode inode;
1896: unsigned long *vdi_inuse;
1897: unsigned int start_nr;
1898: uint64_t hval;
1899: uint32_t vid;
1900:
1901: vdi_inuse = qemu_malloc(max);
1902:
1903: fd = connect_to_sdog(s->addr, s->port);
1904: if (fd < 0) {
1905: goto out;
1906: }
1907:
1908: rlen = max;
1909: wlen = 0;
1910:
1911: memset(&req, 0, sizeof(req));
1912:
1913: req.opcode = SD_OP_READ_VDIS;
1914: req.data_length = max;
1915:
1916: ret = do_req(fd, (SheepdogReq *)&req, vdi_inuse, &wlen, &rlen);
1917:
1918: closesocket(fd);
1919: if (ret) {
1920: goto out;
1921: }
1922:
1923: sn_tab = qemu_mallocz(nr * sizeof(*sn_tab));
1924:
1925: /* calculate a vdi id with hash function */
1926: hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT);
1927: start_nr = hval & (SD_NR_VDIS - 1);
1928:
1929: fd = connect_to_sdog(s->addr, s->port);
1930: if (fd < 0) {
1.1.1.3 ! root 1931: error_report("failed to connect");
1.1 root 1932: goto out;
1933: }
1934:
1935: for (vid = start_nr; found < nr; vid = (vid + 1) % SD_NR_VDIS) {
1936: if (!test_bit(vid, vdi_inuse)) {
1937: break;
1938: }
1939:
1940: /* we don't need to read entire object */
1941: ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid),
1942: 0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0);
1943:
1944: if (ret) {
1945: continue;
1946: }
1947:
1948: if (!strcmp(inode.name, s->name) && is_snapshot(&inode)) {
1949: sn_tab[found].date_sec = inode.snap_ctime >> 32;
1950: sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff;
1951: sn_tab[found].vm_state_size = inode.vm_state_size;
1952: sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec;
1953:
1954: snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u",
1955: inode.snap_id);
1956: strncpy(sn_tab[found].name, inode.tag,
1957: MIN(sizeof(sn_tab[found].name), sizeof(inode.tag)));
1958: found++;
1959: }
1960: }
1961:
1962: closesocket(fd);
1963: out:
1964: *psn_tab = sn_tab;
1965:
1966: qemu_free(vdi_inuse);
1967:
1968: return found;
1969: }
1970:
1971: static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
1972: int64_t pos, int size, int load)
1973: {
1974: int fd, create;
1975: int ret = 0;
1976: unsigned int data_len;
1977: uint64_t vmstate_oid;
1978: uint32_t vdi_index;
1979: uint64_t offset;
1980:
1981: fd = connect_to_sdog(s->addr, s->port);
1982: if (fd < 0) {
1983: ret = -EIO;
1984: goto cleanup;
1985: }
1986:
1987: while (size) {
1988: vdi_index = pos / SD_DATA_OBJ_SIZE;
1989: offset = pos % SD_DATA_OBJ_SIZE;
1990:
1991: data_len = MIN(size, SD_DATA_OBJ_SIZE);
1992:
1993: vmstate_oid = vid_to_vmstate_oid(s->inode.vdi_id, vdi_index);
1994:
1995: create = (offset == 0);
1996: if (load) {
1997: ret = read_object(fd, (char *)data, vmstate_oid,
1998: s->inode.nr_copies, data_len, offset);
1999: } else {
2000: ret = write_object(fd, (char *)data, vmstate_oid,
2001: s->inode.nr_copies, data_len, offset, create);
2002: }
2003:
2004: if (ret < 0) {
1.1.1.3 ! root 2005: error_report("failed to save vmstate %s", strerror(errno));
1.1 root 2006: ret = -EIO;
2007: goto cleanup;
2008: }
2009:
2010: pos += data_len;
2011: size -= data_len;
2012: ret += data_len;
2013: }
2014: cleanup:
2015: closesocket(fd);
2016: return ret;
2017: }
2018:
2019: static int sd_save_vmstate(BlockDriverState *bs, const uint8_t *data,
2020: int64_t pos, int size)
2021: {
2022: BDRVSheepdogState *s = bs->opaque;
2023:
2024: return do_load_save_vmstate(s, (uint8_t *)data, pos, size, 0);
2025: }
2026:
2027: static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
2028: int64_t pos, int size)
2029: {
2030: BDRVSheepdogState *s = bs->opaque;
2031:
2032: return do_load_save_vmstate(s, data, pos, size, 1);
2033: }
2034:
2035:
2036: static QEMUOptionParameter sd_create_options[] = {
2037: {
2038: .name = BLOCK_OPT_SIZE,
2039: .type = OPT_SIZE,
2040: .help = "Virtual disk size"
2041: },
2042: {
2043: .name = BLOCK_OPT_BACKING_FILE,
2044: .type = OPT_STRING,
2045: .help = "File name of a base image"
2046: },
1.1.1.3 ! root 2047: {
! 2048: .name = BLOCK_OPT_PREALLOC,
! 2049: .type = OPT_STRING,
! 2050: .help = "Preallocation mode (allowed values: off, full)"
! 2051: },
1.1 root 2052: { NULL }
2053: };
2054:
2055: BlockDriver bdrv_sheepdog = {
2056: .format_name = "sheepdog",
2057: .protocol_name = "sheepdog",
2058: .instance_size = sizeof(BDRVSheepdogState),
2059: .bdrv_file_open = sd_open,
2060: .bdrv_close = sd_close,
2061: .bdrv_create = sd_create,
2062: .bdrv_getlength = sd_getlength,
2063: .bdrv_truncate = sd_truncate,
2064:
2065: .bdrv_aio_readv = sd_aio_readv,
2066: .bdrv_aio_writev = sd_aio_writev,
2067:
2068: .bdrv_snapshot_create = sd_snapshot_create,
2069: .bdrv_snapshot_goto = sd_snapshot_goto,
2070: .bdrv_snapshot_delete = sd_snapshot_delete,
2071: .bdrv_snapshot_list = sd_snapshot_list,
2072:
2073: .bdrv_save_vmstate = sd_save_vmstate,
2074: .bdrv_load_vmstate = sd_load_vmstate,
2075:
2076: .create_options = sd_create_options,
2077: };
2078:
2079: static void bdrv_sheepdog_init(void)
2080: {
2081: bdrv_register(&bdrv_sheepdog);
2082: }
2083: block_init(bdrv_sheepdog_init);
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.