Annotation of qemu/block/nbd.c, revision 1.1.1.6

1.1       root        1: /*
                      2:  * QEMU Block driver for  NBD
                      3:  *
                      4:  * Copyright (C) 2008 Bull S.A.S.
                      5:  *     Author: Laurent Vivier <[email protected]>
                      6:  *
                      7:  * Some parts:
                      8:  *    Copyright (C) 2007 Anthony Liguori <[email protected]>
                      9:  *
                     10:  * Permission is hereby granted, free of charge, to any person obtaining a copy
                     11:  * of this software and associated documentation files (the "Software"), to deal
                     12:  * in the Software without restriction, including without limitation the rights
                     13:  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
                     14:  * copies of the Software, and to permit persons to whom the Software is
                     15:  * furnished to do so, subject to the following conditions:
                     16:  *
                     17:  * The above copyright notice and this permission notice shall be included in
                     18:  * all copies or substantial portions of the Software.
                     19:  *
                     20:  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
                     21:  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
                     22:  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
                     23:  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
                     24:  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
                     25:  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
                     26:  * THE SOFTWARE.
                     27:  */
                     28: 
                     29: #include "qemu-common.h"
                     30: #include "nbd.h"
1.1.1.5   root       31: #include "block_int.h"
1.1       root       32: #include "module.h"
1.1.1.4   root       33: #include "qemu_socket.h"
1.1       root       34: 
                     35: #include <sys/types.h>
                     36: #include <unistd.h>
                     37: 
1.1.1.3   root       38: #define EN_OPTSTR ":exportname="
                     39: 
1.1.1.4   root       40: /* #define DEBUG_NBD */
                     41: 
                     42: #if defined(DEBUG_NBD)
                     43: #define logout(fmt, ...) \
                     44:                 fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
                     45: #else
                     46: #define logout(fmt, ...) ((void)0)
                     47: #endif
                     48: 
1.1.1.6 ! root       49: #define MAX_NBD_REQUESTS       16
        !            50: #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
        !            51: #define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
        !            52: 
1.1       root       53: typedef struct BDRVNBDState {
                     54:     int sock;
1.1.1.5   root       55:     uint32_t nbdflags;
1.1       root       56:     off_t size;
                     57:     size_t blocksize;
1.1.1.4   root       58:     char *export_name; /* An NBD server may export several devices */
                     59: 
1.1.1.6 ! root       60:     CoMutex send_mutex;
        !            61:     CoMutex free_sema;
        !            62:     Coroutine *send_coroutine;
        !            63:     int in_flight;
        !            64: 
        !            65:     Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
        !            66:     struct nbd_reply reply;
        !            67: 
1.1.1.4   root       68:     /* If it begins with  '/', this is a UNIX domain socket. Otherwise,
                     69:      * it's a string of the form <hostname|ip4|\[ip6\]>:port
                     70:      */
                     71:     char *host_spec;
1.1       root       72: } BDRVNBDState;
                     73: 
1.1.1.4   root       74: static int nbd_config(BDRVNBDState *s, const char *filename, int flags)
1.1       root       75: {
1.1.1.3   root       76:     char *file;
1.1.1.4   root       77:     char *export_name;
                     78:     const char *host_spec;
1.1       root       79:     const char *unixpath;
1.1.1.3   root       80:     int err = -EINVAL;
                     81: 
1.1.1.5   root       82:     file = g_strdup(filename);
1.1       root       83: 
1.1.1.4   root       84:     export_name = strstr(file, EN_OPTSTR);
                     85:     if (export_name) {
                     86:         if (export_name[strlen(EN_OPTSTR)] == 0) {
1.1.1.3   root       87:             goto out;
                     88:         }
1.1.1.4   root       89:         export_name[0] = 0; /* truncate 'file' */
                     90:         export_name += strlen(EN_OPTSTR);
1.1.1.5   root       91:         s->export_name = g_strdup(export_name);
1.1.1.3   root       92:     }
                     93: 
1.1.1.4   root       94:     /* extract the host_spec - fail if it's not nbd:... */
                     95:     if (!strstart(file, "nbd:", &host_spec)) {
1.1.1.3   root       96:         goto out;
                     97:     }
1.1       root       98: 
1.1.1.4   root       99:     /* are we a UNIX or TCP socket? */
                    100:     if (strstart(host_spec, "unix:", &unixpath)) {
                    101:         if (unixpath[0] != '/') { /* We demand  an absolute path*/
1.1.1.3   root      102:             goto out;
                    103:         }
1.1.1.5   root      104:         s->host_spec = g_strdup(unixpath);
1.1.1.4   root      105:     } else {
1.1.1.5   root      106:         s->host_spec = g_strdup(host_spec);
1.1.1.4   root      107:     }
1.1       root      108: 
1.1.1.4   root      109:     err = 0;
1.1       root      110: 
1.1.1.4   root      111: out:
1.1.1.5   root      112:     g_free(file);
1.1.1.4   root      113:     if (err != 0) {
1.1.1.5   root      114:         g_free(s->export_name);
                    115:         g_free(s->host_spec);
1.1.1.4   root      116:     }
                    117:     return err;
                    118: }
1.1       root      119: 
1.1.1.6 ! root      120: static void nbd_coroutine_start(BDRVNBDState *s, struct nbd_request *request)
        !           121: {
        !           122:     int i;
        !           123: 
        !           124:     /* Poor man semaphore.  The free_sema is locked when no other request
        !           125:      * can be accepted, and unlocked after receiving one reply.  */
        !           126:     if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
        !           127:         qemu_co_mutex_lock(&s->free_sema);
        !           128:         assert(s->in_flight < MAX_NBD_REQUESTS);
        !           129:     }
        !           130:     s->in_flight++;
        !           131: 
        !           132:     for (i = 0; i < MAX_NBD_REQUESTS; i++) {
        !           133:         if (s->recv_coroutine[i] == NULL) {
        !           134:             s->recv_coroutine[i] = qemu_coroutine_self();
        !           135:             break;
        !           136:         }
        !           137:     }
        !           138: 
        !           139:     assert(i < MAX_NBD_REQUESTS);
        !           140:     request->handle = INDEX_TO_HANDLE(s, i);
        !           141: }
        !           142: 
        !           143: static int nbd_have_request(void *opaque)
        !           144: {
        !           145:     BDRVNBDState *s = opaque;
        !           146: 
        !           147:     return s->in_flight > 0;
        !           148: }
        !           149: 
        !           150: static void nbd_reply_ready(void *opaque)
        !           151: {
        !           152:     BDRVNBDState *s = opaque;
        !           153:     uint64_t i;
        !           154:     int ret;
        !           155: 
        !           156:     if (s->reply.handle == 0) {
        !           157:         /* No reply already in flight.  Fetch a header.  It is possible
        !           158:          * that another thread has done the same thing in parallel, so
        !           159:          * the socket is not readable anymore.
        !           160:          */
        !           161:         ret = nbd_receive_reply(s->sock, &s->reply);
        !           162:         if (ret == -EAGAIN) {
        !           163:             return;
        !           164:         }
        !           165:         if (ret < 0) {
        !           166:             s->reply.handle = 0;
        !           167:             goto fail;
        !           168:         }
        !           169:     }
        !           170: 
        !           171:     /* There's no need for a mutex on the receive side, because the
        !           172:      * handler acts as a synchronization point and ensures that only
        !           173:      * one coroutine is called until the reply finishes.  */
        !           174:     i = HANDLE_TO_INDEX(s, s->reply.handle);
        !           175:     if (i >= MAX_NBD_REQUESTS) {
        !           176:         goto fail;
        !           177:     }
        !           178: 
        !           179:     if (s->recv_coroutine[i]) {
        !           180:         qemu_coroutine_enter(s->recv_coroutine[i], NULL);
        !           181:         return;
        !           182:     }
        !           183: 
        !           184: fail:
        !           185:     for (i = 0; i < MAX_NBD_REQUESTS; i++) {
        !           186:         if (s->recv_coroutine[i]) {
        !           187:             qemu_coroutine_enter(s->recv_coroutine[i], NULL);
        !           188:         }
        !           189:     }
        !           190: }
        !           191: 
        !           192: static void nbd_restart_write(void *opaque)
        !           193: {
        !           194:     BDRVNBDState *s = opaque;
        !           195:     qemu_coroutine_enter(s->send_coroutine, NULL);
        !           196: }
        !           197: 
        !           198: static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
        !           199:                                struct iovec *iov, int offset)
        !           200: {
        !           201:     int rc, ret;
        !           202: 
        !           203:     qemu_co_mutex_lock(&s->send_mutex);
        !           204:     s->send_coroutine = qemu_coroutine_self();
        !           205:     qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write,
        !           206:                             nbd_have_request, s);
        !           207:     rc = nbd_send_request(s->sock, request);
        !           208:     if (rc >= 0 && iov) {
        !           209:         ret = qemu_co_sendv(s->sock, iov, request->len, offset);
        !           210:         if (ret != request->len) {
        !           211:             return -EIO;
        !           212:         }
        !           213:     }
        !           214:     qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL,
        !           215:                             nbd_have_request, s);
        !           216:     s->send_coroutine = NULL;
        !           217:     qemu_co_mutex_unlock(&s->send_mutex);
        !           218:     return rc;
        !           219: }
        !           220: 
        !           221: static void nbd_co_receive_reply(BDRVNBDState *s, struct nbd_request *request,
        !           222:                                  struct nbd_reply *reply,
        !           223:                                  struct iovec *iov, int offset)
        !           224: {
        !           225:     int ret;
        !           226: 
        !           227:     /* Wait until we're woken up by the read handler.  TODO: perhaps
        !           228:      * peek at the next reply and avoid yielding if it's ours?  */
        !           229:     qemu_coroutine_yield();
        !           230:     *reply = s->reply;
        !           231:     if (reply->handle != request->handle) {
        !           232:         reply->error = EIO;
        !           233:     } else {
        !           234:         if (iov && reply->error == 0) {
        !           235:             ret = qemu_co_recvv(s->sock, iov, request->len, offset);
        !           236:             if (ret != request->len) {
        !           237:                 reply->error = EIO;
        !           238:             }
        !           239:         }
        !           240: 
        !           241:         /* Tell the read handler to read another header.  */
        !           242:         s->reply.handle = 0;
        !           243:     }
        !           244: }
        !           245: 
        !           246: static void nbd_coroutine_end(BDRVNBDState *s, struct nbd_request *request)
        !           247: {
        !           248:     int i = HANDLE_TO_INDEX(s, request->handle);
        !           249:     s->recv_coroutine[i] = NULL;
        !           250:     if (s->in_flight-- == MAX_NBD_REQUESTS) {
        !           251:         qemu_co_mutex_unlock(&s->free_sema);
        !           252:     }
        !           253: }
        !           254: 
1.1.1.4   root      255: static int nbd_establish_connection(BlockDriverState *bs)
                    256: {
                    257:     BDRVNBDState *s = bs->opaque;
                    258:     int sock;
                    259:     int ret;
                    260:     off_t size;
                    261:     size_t blocksize;
                    262: 
                    263:     if (s->host_spec[0] == '/') {
                    264:         sock = unix_socket_outgoing(s->host_spec);
                    265:     } else {
                    266:         sock = tcp_socket_outgoing_spec(s->host_spec);
1.1       root      267:     }
                    268: 
1.1.1.4   root      269:     /* Failed to establish connection */
1.1.1.6 ! root      270:     if (sock < 0) {
1.1.1.4   root      271:         logout("Failed to establish connection to NBD server\n");
                    272:         return -errno;
1.1.1.3   root      273:     }
1.1       root      274: 
1.1.1.4   root      275:     /* NBD handshake */
1.1.1.5   root      276:     ret = nbd_receive_negotiate(sock, s->export_name, &s->nbdflags, &size,
1.1.1.4   root      277:                                 &blocksize);
1.1.1.6 ! root      278:     if (ret < 0) {
1.1.1.4   root      279:         logout("Failed to negotiate with the NBD server\n");
                    280:         closesocket(sock);
1.1.1.6 ! root      281:         return ret;
1.1.1.3   root      282:     }
1.1       root      283: 
1.1.1.6 ! root      284:     /* Now that we're connected, set the socket to be non-blocking and
        !           285:      * kick the reply mechanism.  */
1.1.1.4   root      286:     socket_set_nonblock(sock);
1.1.1.6 ! root      287:     qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL,
        !           288:                             nbd_have_request, s);
1.1.1.4   root      289: 
1.1       root      290:     s->sock = sock;
                    291:     s->size = size;
                    292:     s->blocksize = blocksize;
                    293: 
1.1.1.4   root      294:     logout("Established connection with NBD server\n");
                    295:     return 0;
                    296: }
                    297: 
                    298: static void nbd_teardown_connection(BlockDriverState *bs)
                    299: {
                    300:     BDRVNBDState *s = bs->opaque;
                    301:     struct nbd_request request;
                    302: 
                    303:     request.type = NBD_CMD_DISC;
                    304:     request.from = 0;
                    305:     request.len = 0;
                    306:     nbd_send_request(s->sock, &request);
                    307: 
1.1.1.6 ! root      308:     qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL);
1.1.1.4   root      309:     closesocket(s->sock);
                    310: }
                    311: 
                    312: static int nbd_open(BlockDriverState *bs, const char* filename, int flags)
                    313: {
                    314:     BDRVNBDState *s = bs->opaque;
                    315:     int result;
                    316: 
1.1.1.6 ! root      317:     qemu_co_mutex_init(&s->send_mutex);
        !           318:     qemu_co_mutex_init(&s->free_sema);
        !           319: 
1.1.1.4   root      320:     /* Pop the config into our state object. Exit if invalid. */
                    321:     result = nbd_config(s, filename, flags);
                    322:     if (result != 0) {
                    323:         return result;
                    324:     }
                    325: 
                    326:     /* establish TCP connection, return error if it fails
                    327:      * TODO: Configurable retry-until-timeout behaviour.
                    328:      */
                    329:     result = nbd_establish_connection(bs);
                    330: 
                    331:     return result;
1.1       root      332: }
                    333: 
1.1.1.6 ! root      334: static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
        !           335:                           int nb_sectors, QEMUIOVector *qiov,
        !           336:                           int offset)
1.1       root      337: {
                    338:     BDRVNBDState *s = bs->opaque;
                    339:     struct nbd_request request;
                    340:     struct nbd_reply reply;
1.1.1.6 ! root      341:     ssize_t ret;
1.1       root      342: 
                    343:     request.type = NBD_CMD_READ;
1.1.1.6 ! root      344:     request.from = sector_num * 512;
1.1       root      345:     request.len = nb_sectors * 512;
                    346: 
1.1.1.6 ! root      347:     nbd_coroutine_start(s, &request);
        !           348:     ret = nbd_co_send_request(s, &request, NULL, 0);
        !           349:     if (ret < 0) {
        !           350:         reply.error = -ret;
        !           351:     } else {
        !           352:         nbd_co_receive_reply(s, &request, &reply, qiov->iov, offset);
        !           353:     }
        !           354:     nbd_coroutine_end(s, &request);
        !           355:     return -reply.error;
1.1       root      356: 
                    357: }
                    358: 
1.1.1.6 ! root      359: static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
        !           360:                            int nb_sectors, QEMUIOVector *qiov,
        !           361:                            int offset)
1.1       root      362: {
                    363:     BDRVNBDState *s = bs->opaque;
                    364:     struct nbd_request request;
                    365:     struct nbd_reply reply;
1.1.1.6 ! root      366:     ssize_t ret;
1.1       root      367: 
                    368:     request.type = NBD_CMD_WRITE;
1.1.1.6 ! root      369:     if (!bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
        !           370:         request.type |= NBD_CMD_FLAG_FUA;
        !           371:     }
1.1       root      372: 
1.1.1.6 ! root      373:     request.from = sector_num * 512;
        !           374:     request.len = nb_sectors * 512;
1.1       root      375: 
1.1.1.6 ! root      376:     nbd_coroutine_start(s, &request);
        !           377:     ret = nbd_co_send_request(s, &request, qiov->iov, offset);
        !           378:     if (ret < 0) {
        !           379:         reply.error = -ret;
        !           380:     } else {
        !           381:         nbd_co_receive_reply(s, &request, &reply, NULL, 0);
        !           382:     }
        !           383:     nbd_coroutine_end(s, &request);
        !           384:     return -reply.error;
        !           385: }
1.1       root      386: 
1.1.1.6 ! root      387: /* qemu-nbd has a limit of slightly less than 1M per request.  Try to
        !           388:  * remain aligned to 4K. */
        !           389: #define NBD_MAX_SECTORS 2040
1.1       root      390: 
1.1.1.6 ! root      391: static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
        !           392:                         int nb_sectors, QEMUIOVector *qiov)
        !           393: {
        !           394:     int offset = 0;
        !           395:     int ret;
        !           396:     while (nb_sectors > NBD_MAX_SECTORS) {
        !           397:         ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
        !           398:         if (ret < 0) {
        !           399:             return ret;
        !           400:         }
        !           401:         offset += NBD_MAX_SECTORS * 512;
        !           402:         sector_num += NBD_MAX_SECTORS;
        !           403:         nb_sectors -= NBD_MAX_SECTORS;
        !           404:     }
        !           405:     return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
1.1       root      406: }
                    407: 
1.1.1.6 ! root      408: static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
        !           409:                          int nb_sectors, QEMUIOVector *qiov)
1.1.1.5   root      410: {
1.1.1.6 ! root      411:     int offset = 0;
1.1.1.5   root      412:     int ret;
1.1.1.6 ! root      413:     while (nb_sectors > NBD_MAX_SECTORS) {
        !           414:         ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
        !           415:         if (ret < 0) {
        !           416:             return ret;
        !           417:         }
        !           418:         offset += NBD_MAX_SECTORS * 512;
        !           419:         sector_num += NBD_MAX_SECTORS;
        !           420:         nb_sectors -= NBD_MAX_SECTORS;
        !           421:     }
        !           422:     return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
        !           423: }
        !           424: 
        !           425: static int nbd_co_flush(BlockDriverState *bs)
        !           426: {
1.1.1.5   root      427:     BDRVNBDState *s = bs->opaque;
1.1.1.6 ! root      428:     struct nbd_request request;
        !           429:     struct nbd_reply reply;
        !           430:     ssize_t ret;
        !           431: 
        !           432:     if (!(s->nbdflags & NBD_FLAG_SEND_FLUSH)) {
        !           433:         return 0;
        !           434:     }
        !           435: 
        !           436:     request.type = NBD_CMD_FLUSH;
        !           437:     if (s->nbdflags & NBD_FLAG_SEND_FUA) {
        !           438:         request.type |= NBD_CMD_FLAG_FUA;
        !           439:     }
        !           440: 
        !           441:     request.from = 0;
        !           442:     request.len = 0;
        !           443: 
        !           444:     nbd_coroutine_start(s, &request);
        !           445:     ret = nbd_co_send_request(s, &request, NULL, 0);
        !           446:     if (ret < 0) {
        !           447:         reply.error = -ret;
        !           448:     } else {
        !           449:         nbd_co_receive_reply(s, &request, &reply, NULL, 0);
        !           450:     }
        !           451:     nbd_coroutine_end(s, &request);
        !           452:     return -reply.error;
1.1.1.5   root      453: }
                    454: 
1.1.1.6 ! root      455: static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
        !           456:                           int nb_sectors)
1.1.1.5   root      457: {
                    458:     BDRVNBDState *s = bs->opaque;
1.1.1.6 ! root      459:     struct nbd_request request;
        !           460:     struct nbd_reply reply;
        !           461:     ssize_t ret;
        !           462: 
        !           463:     if (!(s->nbdflags & NBD_FLAG_SEND_TRIM)) {
        !           464:         return 0;
        !           465:     }
        !           466:     request.type = NBD_CMD_TRIM;
        !           467:     request.from = sector_num * 512;;
        !           468:     request.len = nb_sectors * 512;
        !           469: 
        !           470:     nbd_coroutine_start(s, &request);
        !           471:     ret = nbd_co_send_request(s, &request, NULL, 0);
        !           472:     if (ret < 0) {
        !           473:         reply.error = -ret;
        !           474:     } else {
        !           475:         nbd_co_receive_reply(s, &request, &reply, NULL, 0);
        !           476:     }
        !           477:     nbd_coroutine_end(s, &request);
        !           478:     return -reply.error;
1.1.1.5   root      479: }
                    480: 
1.1       root      481: static void nbd_close(BlockDriverState *bs)
                    482: {
                    483:     BDRVNBDState *s = bs->opaque;
1.1.1.5   root      484:     g_free(s->export_name);
                    485:     g_free(s->host_spec);
1.1       root      486: 
1.1.1.4   root      487:     nbd_teardown_connection(bs);
1.1       root      488: }
                    489: 
                    490: static int64_t nbd_getlength(BlockDriverState *bs)
                    491: {
                    492:     BDRVNBDState *s = bs->opaque;
                    493: 
                    494:     return s->size;
                    495: }
                    496: 
                    497: static BlockDriver bdrv_nbd = {
1.1.1.6 ! root      498:     .format_name         = "nbd",
        !           499:     .instance_size       = sizeof(BDRVNBDState),
        !           500:     .bdrv_file_open      = nbd_open,
        !           501:     .bdrv_co_readv       = nbd_co_readv,
        !           502:     .bdrv_co_writev      = nbd_co_writev,
        !           503:     .bdrv_close          = nbd_close,
        !           504:     .bdrv_co_flush_to_os = nbd_co_flush,
        !           505:     .bdrv_co_discard     = nbd_co_discard,
        !           506:     .bdrv_getlength      = nbd_getlength,
        !           507:     .protocol_name       = "nbd",
1.1       root      508: };
                    509: 
                    510: static void bdrv_nbd_init(void)
                    511: {
                    512:     bdrv_register(&bdrv_nbd);
                    513: }
                    514: 
                    515: block_init(bdrv_nbd_init);

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.