Annotation of qemu/hw/xen_disk.c, revision 1.1.1.3

1.1       root        1: /*
                      2:  *  xen paravirt block device backend
                      3:  *
                      4:  *  (c) Gerd Hoffmann <kraxel@redhat.com>
                      5:  *
                      6:  *  This program is free software; you can redistribute it and/or modify
                      7:  *  it under the terms of the GNU General Public License as published by
                      8:  *  the Free Software Foundation; under version 2 of the License.
                      9:  *
                     10:  *  This program is distributed in the hope that it will be useful,
                     11:  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
                     12:  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     13:  *  GNU General Public License for more details.
                     14:  *
                     15:  *  You should have received a copy of the GNU General Public License along
                     16:  *  with this program; if not, see <http://www.gnu.org/licenses/>.
                     17:  */
                     18: 
                     19: #include <stdio.h>
                     20: #include <stdlib.h>
                     21: #include <stdarg.h>
                     22: #include <string.h>
                     23: #include <unistd.h>
                     24: #include <signal.h>
                     25: #include <inttypes.h>
                     26: #include <time.h>
                     27: #include <fcntl.h>
                     28: #include <errno.h>
                     29: #include <sys/ioctl.h>
                     30: #include <sys/types.h>
                     31: #include <sys/stat.h>
                     32: #include <sys/mman.h>
                     33: #include <sys/uio.h>
                     34: 
                     35: #include <xs.h>
                     36: #include <xenctrl.h>
                     37: #include <xen/io/xenbus.h>
                     38: 
                     39: #include "hw.h"
                     40: #include "block_int.h"
                     41: #include "qemu-char.h"
                     42: #include "xen_blkif.h"
                     43: #include "xen_backend.h"
                     44: 
                     45: /* ------------------------------------------------------------- */
                     46: 
                     47: static int syncwrite    = 0;
                     48: static int batch_maps   = 0;
                     49: 
                     50: static int max_requests = 32;
                     51: static int use_aio      = 1;
                     52: 
                     53: /* ------------------------------------------------------------- */
                     54: 
                     55: #define BLOCK_SIZE  512
                     56: #define IOCB_COUNT  (BLKIF_MAX_SEGMENTS_PER_REQUEST + 2)
                     57: 
                     58: struct ioreq {
                     59:     blkif_request_t     req;
                     60:     int16_t             status;
                     61: 
                     62:     /* parsed request */
                     63:     off_t               start;
                     64:     QEMUIOVector        v;
                     65:     int                 presync;
                     66:     int                 postsync;
                     67: 
                     68:     /* grant mapping */
                     69:     uint32_t            domids[BLKIF_MAX_SEGMENTS_PER_REQUEST];
                     70:     uint32_t            refs[BLKIF_MAX_SEGMENTS_PER_REQUEST];
                     71:     int                 prot;
                     72:     void                *page[BLKIF_MAX_SEGMENTS_PER_REQUEST];
                     73:     void                *pages;
                     74: 
                     75:     /* aio status */
                     76:     int                 aio_inflight;
                     77:     int                 aio_errors;
                     78: 
                     79:     struct XenBlkDev    *blkdev;
1.1.1.2   root       80:     QLIST_ENTRY(ioreq)   list;
1.1       root       81: };
                     82: 
                     83: struct XenBlkDev {
                     84:     struct XenDevice    xendev;  /* must be first */
                     85:     char                *params;
                     86:     char                *mode;
                     87:     char                *type;
                     88:     char                *dev;
                     89:     char                *devtype;
                     90:     const char          *fileproto;
                     91:     const char          *filename;
                     92:     int                 ring_ref;
                     93:     void                *sring;
                     94:     int64_t             file_blk;
                     95:     int64_t             file_size;
                     96:     int                 protocol;
                     97:     blkif_back_rings_t  rings;
                     98:     int                 more_work;
                     99:     int                 cnt_map;
                    100: 
                    101:     /* request lists */
1.1.1.2   root      102:     QLIST_HEAD(inflight_head, ioreq) inflight;
                    103:     QLIST_HEAD(finished_head, ioreq) finished;
                    104:     QLIST_HEAD(freelist_head, ioreq) freelist;
1.1       root      105:     int                 requests_total;
                    106:     int                 requests_inflight;
                    107:     int                 requests_finished;
                    108: 
                    109:     /* qemu block driver */
1.1.1.2   root      110:     DriveInfo           *dinfo;
1.1       root      111:     BlockDriverState    *bs;
                    112:     QEMUBH              *bh;
                    113: };
                    114: 
                    115: /* ------------------------------------------------------------- */
                    116: 
                    117: static struct ioreq *ioreq_start(struct XenBlkDev *blkdev)
                    118: {
                    119:     struct ioreq *ioreq = NULL;
                    120: 
1.1.1.2   root      121:     if (QLIST_EMPTY(&blkdev->freelist)) {
1.1       root      122:        if (blkdev->requests_total >= max_requests)
                    123:            goto out;
                    124:        /* allocate new struct */
                    125:        ioreq = qemu_mallocz(sizeof(*ioreq));
                    126:        ioreq->blkdev = blkdev;
                    127:        blkdev->requests_total++;
                    128:         qemu_iovec_init(&ioreq->v, BLKIF_MAX_SEGMENTS_PER_REQUEST);
                    129:     } else {
                    130:        /* get one from freelist */
1.1.1.2   root      131:        ioreq = QLIST_FIRST(&blkdev->freelist);
                    132:        QLIST_REMOVE(ioreq, list);
1.1       root      133:         qemu_iovec_reset(&ioreq->v);
                    134:     }
1.1.1.2   root      135:     QLIST_INSERT_HEAD(&blkdev->inflight, ioreq, list);
1.1       root      136:     blkdev->requests_inflight++;
                    137: 
                    138: out:
                    139:     return ioreq;
                    140: }
                    141: 
                    142: static void ioreq_finish(struct ioreq *ioreq)
                    143: {
                    144:     struct XenBlkDev *blkdev = ioreq->blkdev;
                    145: 
1.1.1.2   root      146:     QLIST_REMOVE(ioreq, list);
                    147:     QLIST_INSERT_HEAD(&blkdev->finished, ioreq, list);
1.1       root      148:     blkdev->requests_inflight--;
                    149:     blkdev->requests_finished++;
                    150: }
                    151: 
                    152: static void ioreq_release(struct ioreq *ioreq)
                    153: {
                    154:     struct XenBlkDev *blkdev = ioreq->blkdev;
                    155: 
1.1.1.2   root      156:     QLIST_REMOVE(ioreq, list);
1.1       root      157:     memset(ioreq, 0, sizeof(*ioreq));
                    158:     ioreq->blkdev = blkdev;
1.1.1.2   root      159:     QLIST_INSERT_HEAD(&blkdev->freelist, ioreq, list);
1.1       root      160:     blkdev->requests_finished--;
                    161: }
                    162: 
                    163: /*
                    164:  * translate request into iovec + start offset
                    165:  * do sanity checks along the way
                    166:  */
                    167: static int ioreq_parse(struct ioreq *ioreq)
                    168: {
                    169:     struct XenBlkDev *blkdev = ioreq->blkdev;
                    170:     uintptr_t mem;
                    171:     size_t len;
                    172:     int i;
                    173: 
                    174:     xen_be_printf(&blkdev->xendev, 3,
                    175:                  "op %d, nr %d, handle %d, id %" PRId64 ", sector %" PRId64 "\n",
                    176:                  ioreq->req.operation, ioreq->req.nr_segments,
                    177:                  ioreq->req.handle, ioreq->req.id, ioreq->req.sector_number);
                    178:     switch (ioreq->req.operation) {
                    179:     case BLKIF_OP_READ:
                    180:        ioreq->prot = PROT_WRITE; /* to memory */
                    181:        break;
                    182:     case BLKIF_OP_WRITE_BARRIER:
                    183:        if (!syncwrite)
                    184:            ioreq->presync = ioreq->postsync = 1;
                    185:        /* fall through */
                    186:     case BLKIF_OP_WRITE:
                    187:        ioreq->prot = PROT_READ; /* from memory */
                    188:        if (syncwrite)
                    189:            ioreq->postsync = 1;
                    190:        break;
                    191:     default:
                    192:        xen_be_printf(&blkdev->xendev, 0, "error: unknown operation (%d)\n",
                    193:                      ioreq->req.operation);
                    194:        goto err;
                    195:     };
                    196: 
                    197:     if (ioreq->req.operation != BLKIF_OP_READ && blkdev->mode[0] != 'w') {
                    198:         xen_be_printf(&blkdev->xendev, 0, "error: write req for ro device\n");
                    199:         goto err;
                    200:     }
                    201: 
                    202:     ioreq->start = ioreq->req.sector_number * blkdev->file_blk;
                    203:     for (i = 0; i < ioreq->req.nr_segments; i++) {
                    204:        if (i == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
                    205:            xen_be_printf(&blkdev->xendev, 0, "error: nr_segments too big\n");
                    206:            goto err;
                    207:        }
                    208:        if (ioreq->req.seg[i].first_sect > ioreq->req.seg[i].last_sect) {
                    209:            xen_be_printf(&blkdev->xendev, 0, "error: first > last sector\n");
                    210:            goto err;
                    211:        }
                    212:        if (ioreq->req.seg[i].last_sect * BLOCK_SIZE >= XC_PAGE_SIZE) {
                    213:            xen_be_printf(&blkdev->xendev, 0, "error: page crossing\n");
                    214:            goto err;
                    215:        }
                    216: 
                    217:        ioreq->domids[i] = blkdev->xendev.dom;
                    218:        ioreq->refs[i]   = ioreq->req.seg[i].gref;
                    219: 
                    220:        mem = ioreq->req.seg[i].first_sect * blkdev->file_blk;
                    221:        len = (ioreq->req.seg[i].last_sect - ioreq->req.seg[i].first_sect + 1) * blkdev->file_blk;
                    222:         qemu_iovec_add(&ioreq->v, (void*)mem, len);
                    223:     }
                    224:     if (ioreq->start + ioreq->v.size > blkdev->file_size) {
                    225:        xen_be_printf(&blkdev->xendev, 0, "error: access beyond end of file\n");
                    226:        goto err;
                    227:     }
                    228:     return 0;
                    229: 
                    230: err:
                    231:     ioreq->status = BLKIF_RSP_ERROR;
                    232:     return -1;
                    233: }
                    234: 
                    235: static void ioreq_unmap(struct ioreq *ioreq)
                    236: {
                    237:     int gnt = ioreq->blkdev->xendev.gnttabdev;
                    238:     int i;
                    239: 
                    240:     if (ioreq->v.niov == 0)
                    241:         return;
                    242:     if (batch_maps) {
                    243:        if (!ioreq->pages)
                    244:            return;
                    245:        if (xc_gnttab_munmap(gnt, ioreq->pages, ioreq->v.niov) != 0)
                    246:            xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
                    247:                          strerror(errno));
                    248:        ioreq->blkdev->cnt_map -= ioreq->v.niov;
                    249:        ioreq->pages = NULL;
                    250:     } else {
                    251:        for (i = 0; i < ioreq->v.niov; i++) {
                    252:            if (!ioreq->page[i])
                    253:                continue;
                    254:            if (xc_gnttab_munmap(gnt, ioreq->page[i], 1) != 0)
                    255:                xen_be_printf(&ioreq->blkdev->xendev, 0, "xc_gnttab_munmap failed: %s\n",
                    256:                              strerror(errno));
                    257:            ioreq->blkdev->cnt_map--;
                    258:            ioreq->page[i] = NULL;
                    259:        }
                    260:     }
                    261: }
                    262: 
                    263: static int ioreq_map(struct ioreq *ioreq)
                    264: {
                    265:     int gnt = ioreq->blkdev->xendev.gnttabdev;
                    266:     int i;
                    267: 
                    268:     if (ioreq->v.niov == 0)
                    269:         return 0;
                    270:     if (batch_maps) {
                    271:        ioreq->pages = xc_gnttab_map_grant_refs
                    272:            (gnt, ioreq->v.niov, ioreq->domids, ioreq->refs, ioreq->prot);
                    273:        if (ioreq->pages == NULL) {
                    274:            xen_be_printf(&ioreq->blkdev->xendev, 0,
                    275:                          "can't map %d grant refs (%s, %d maps)\n",
                    276:                          ioreq->v.niov, strerror(errno), ioreq->blkdev->cnt_map);
                    277:            return -1;
                    278:        }
                    279:        for (i = 0; i < ioreq->v.niov; i++)
                    280:            ioreq->v.iov[i].iov_base = ioreq->pages + i * XC_PAGE_SIZE +
                    281:                (uintptr_t)ioreq->v.iov[i].iov_base;
                    282:        ioreq->blkdev->cnt_map += ioreq->v.niov;
                    283:     } else  {
                    284:        for (i = 0; i < ioreq->v.niov; i++) {
                    285:            ioreq->page[i] = xc_gnttab_map_grant_ref
                    286:                (gnt, ioreq->domids[i], ioreq->refs[i], ioreq->prot);
                    287:            if (ioreq->page[i] == NULL) {
                    288:                xen_be_printf(&ioreq->blkdev->xendev, 0,
                    289:                              "can't map grant ref %d (%s, %d maps)\n",
                    290:                              ioreq->refs[i], strerror(errno), ioreq->blkdev->cnt_map);
                    291:                ioreq_unmap(ioreq);
                    292:                return -1;
                    293:            }
                    294:            ioreq->v.iov[i].iov_base = ioreq->page[i] + (uintptr_t)ioreq->v.iov[i].iov_base;
                    295:            ioreq->blkdev->cnt_map++;
                    296:        }
                    297:     }
                    298:     return 0;
                    299: }
                    300: 
                    301: static int ioreq_runio_qemu_sync(struct ioreq *ioreq)
                    302: {
                    303:     struct XenBlkDev *blkdev = ioreq->blkdev;
                    304:     int i, rc, len = 0;
                    305:     off_t pos;
                    306: 
                    307:     if (ioreq_map(ioreq) == -1)
                    308:        goto err;
                    309:     if (ioreq->presync)
                    310:        bdrv_flush(blkdev->bs);
                    311: 
                    312:     switch (ioreq->req.operation) {
                    313:     case BLKIF_OP_READ:
                    314:        pos = ioreq->start;
                    315:        for (i = 0; i < ioreq->v.niov; i++) {
                    316:            rc = bdrv_read(blkdev->bs, pos / BLOCK_SIZE,
                    317:                           ioreq->v.iov[i].iov_base,
                    318:                           ioreq->v.iov[i].iov_len / BLOCK_SIZE);
                    319:            if (rc != 0) {
                    320:                xen_be_printf(&blkdev->xendev, 0, "rd I/O error (%p, len %zd)\n",
                    321:                              ioreq->v.iov[i].iov_base,
                    322:                              ioreq->v.iov[i].iov_len);
                    323:                goto err;
                    324:            }
                    325:            len += ioreq->v.iov[i].iov_len;
                    326:            pos += ioreq->v.iov[i].iov_len;
                    327:        }
                    328:        break;
                    329:     case BLKIF_OP_WRITE:
                    330:     case BLKIF_OP_WRITE_BARRIER:
                    331:        pos = ioreq->start;
                    332:        for (i = 0; i < ioreq->v.niov; i++) {
                    333:            rc = bdrv_write(blkdev->bs, pos / BLOCK_SIZE,
                    334:                            ioreq->v.iov[i].iov_base,
                    335:                            ioreq->v.iov[i].iov_len / BLOCK_SIZE);
                    336:            if (rc != 0) {
                    337:                xen_be_printf(&blkdev->xendev, 0, "wr I/O error (%p, len %zd)\n",
                    338:                              ioreq->v.iov[i].iov_base,
                    339:                              ioreq->v.iov[i].iov_len);
                    340:                goto err;
                    341:            }
                    342:            len += ioreq->v.iov[i].iov_len;
                    343:            pos += ioreq->v.iov[i].iov_len;
                    344:        }
                    345:        break;
                    346:     default:
                    347:        /* unknown operation (shouldn't happen -- parse catches this) */
                    348:        goto err;
                    349:     }
                    350: 
                    351:     if (ioreq->postsync)
                    352:        bdrv_flush(blkdev->bs);
                    353:     ioreq->status = BLKIF_RSP_OKAY;
                    354: 
                    355:     ioreq_unmap(ioreq);
                    356:     ioreq_finish(ioreq);
                    357:     return 0;
                    358: 
                    359: err:
                    360:     ioreq->status = BLKIF_RSP_ERROR;
                    361:     return -1;
                    362: }
                    363: 
                    364: static void qemu_aio_complete(void *opaque, int ret)
                    365: {
                    366:     struct ioreq *ioreq = opaque;
                    367: 
                    368:     if (ret != 0) {
                    369:         xen_be_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n",
                    370:                       ioreq->req.operation == BLKIF_OP_READ ? "read" : "write");
                    371:         ioreq->aio_errors++;
                    372:     }
                    373: 
                    374:     ioreq->aio_inflight--;
                    375:     if (ioreq->aio_inflight > 0)
                    376:         return;
                    377: 
                    378:     ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
                    379:     ioreq_unmap(ioreq);
                    380:     ioreq_finish(ioreq);
                    381:     qemu_bh_schedule(ioreq->blkdev->bh);
                    382: }
                    383: 
                    384: static int ioreq_runio_qemu_aio(struct ioreq *ioreq)
                    385: {
                    386:     struct XenBlkDev *blkdev = ioreq->blkdev;
                    387: 
                    388:     if (ioreq_map(ioreq) == -1)
                    389:        goto err;
                    390: 
                    391:     ioreq->aio_inflight++;
                    392:     if (ioreq->presync)
                    393:        bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */
                    394: 
                    395:     switch (ioreq->req.operation) {
                    396:     case BLKIF_OP_READ:
                    397:         ioreq->aio_inflight++;
                    398:         bdrv_aio_readv(blkdev->bs, ioreq->start / BLOCK_SIZE,
                    399:                        &ioreq->v, ioreq->v.size / BLOCK_SIZE,
                    400:                        qemu_aio_complete, ioreq);
                    401:        break;
                    402:     case BLKIF_OP_WRITE:
                    403:     case BLKIF_OP_WRITE_BARRIER:
                    404:         ioreq->aio_inflight++;
                    405:         bdrv_aio_writev(blkdev->bs, ioreq->start / BLOCK_SIZE,
                    406:                         &ioreq->v, ioreq->v.size / BLOCK_SIZE,
                    407:                         qemu_aio_complete, ioreq);
                    408:        break;
                    409:     default:
                    410:        /* unknown operation (shouldn't happen -- parse catches this) */
                    411:        goto err;
                    412:     }
                    413: 
                    414:     if (ioreq->postsync)
                    415:        bdrv_flush(blkdev->bs); /* FIXME: aio_flush() ??? */
                    416:     qemu_aio_complete(ioreq, 0);
                    417: 
                    418:     return 0;
                    419: 
                    420: err:
                    421:     ioreq->status = BLKIF_RSP_ERROR;
                    422:     return -1;
                    423: }
                    424: 
                    425: static int blk_send_response_one(struct ioreq *ioreq)
                    426: {
                    427:     struct XenBlkDev  *blkdev = ioreq->blkdev;
                    428:     int               send_notify   = 0;
                    429:     int               have_requests = 0;
                    430:     blkif_response_t  resp;
                    431:     void              *dst;
                    432: 
                    433:     resp.id        = ioreq->req.id;
                    434:     resp.operation = ioreq->req.operation;
                    435:     resp.status    = ioreq->status;
                    436: 
                    437:     /* Place on the response ring for the relevant domain. */
                    438:     switch (blkdev->protocol) {
                    439:     case BLKIF_PROTOCOL_NATIVE:
                    440:        dst = RING_GET_RESPONSE(&blkdev->rings.native, blkdev->rings.native.rsp_prod_pvt);
                    441:        break;
                    442:     case BLKIF_PROTOCOL_X86_32:
                    443:         dst = RING_GET_RESPONSE(&blkdev->rings.x86_32_part,
                    444:                                 blkdev->rings.x86_32_part.rsp_prod_pvt);
                    445:        break;
                    446:     case BLKIF_PROTOCOL_X86_64:
                    447:         dst = RING_GET_RESPONSE(&blkdev->rings.x86_64_part,
                    448:                                 blkdev->rings.x86_64_part.rsp_prod_pvt);
                    449:        break;
                    450:     default:
                    451:        dst = NULL;
                    452:     }
                    453:     memcpy(dst, &resp, sizeof(resp));
                    454:     blkdev->rings.common.rsp_prod_pvt++;
                    455: 
                    456:     RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blkdev->rings.common, send_notify);
                    457:     if (blkdev->rings.common.rsp_prod_pvt == blkdev->rings.common.req_cons) {
                    458:        /*
                    459:         * Tail check for pending requests. Allows frontend to avoid
                    460:         * notifications if requests are already in flight (lower
                    461:         * overheads and promotes batching).
                    462:         */
                    463:        RING_FINAL_CHECK_FOR_REQUESTS(&blkdev->rings.common, have_requests);
                    464:     } else if (RING_HAS_UNCONSUMED_REQUESTS(&blkdev->rings.common)) {
                    465:        have_requests = 1;
                    466:     }
                    467: 
                    468:     if (have_requests)
                    469:        blkdev->more_work++;
                    470:     return send_notify;
                    471: }
                    472: 
                    473: /* walk finished list, send outstanding responses, free requests */
                    474: static void blk_send_response_all(struct XenBlkDev *blkdev)
                    475: {
                    476:     struct ioreq *ioreq;
                    477:     int send_notify = 0;
                    478: 
1.1.1.2   root      479:     while (!QLIST_EMPTY(&blkdev->finished)) {
                    480:         ioreq = QLIST_FIRST(&blkdev->finished);
1.1       root      481:        send_notify += blk_send_response_one(ioreq);
                    482:        ioreq_release(ioreq);
                    483:     }
                    484:     if (send_notify)
                    485:        xen_be_send_notify(&blkdev->xendev);
                    486: }
                    487: 
                    488: static int blk_get_request(struct XenBlkDev *blkdev, struct ioreq *ioreq, RING_IDX rc)
                    489: {
                    490:     switch (blkdev->protocol) {
                    491:     case BLKIF_PROTOCOL_NATIVE:
                    492:        memcpy(&ioreq->req, RING_GET_REQUEST(&blkdev->rings.native, rc),
                    493:               sizeof(ioreq->req));
                    494:        break;
                    495:     case BLKIF_PROTOCOL_X86_32:
                    496:         blkif_get_x86_32_req(&ioreq->req,
                    497:                              RING_GET_REQUEST(&blkdev->rings.x86_32_part, rc));
                    498:        break;
                    499:     case BLKIF_PROTOCOL_X86_64:
                    500:         blkif_get_x86_64_req(&ioreq->req,
                    501:                              RING_GET_REQUEST(&blkdev->rings.x86_64_part, rc));
                    502:        break;
                    503:     }
                    504:     return 0;
                    505: }
                    506: 
                    507: static void blk_handle_requests(struct XenBlkDev *blkdev)
                    508: {
                    509:     RING_IDX rc, rp;
                    510:     struct ioreq *ioreq;
                    511: 
                    512:     blkdev->more_work = 0;
                    513: 
                    514:     rc = blkdev->rings.common.req_cons;
                    515:     rp = blkdev->rings.common.sring->req_prod;
                    516:     xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
                    517: 
                    518:     if (use_aio)
                    519:         blk_send_response_all(blkdev);
                    520:     while (rc != rp) {
                    521:         /* pull request from ring */
                    522:         if (RING_REQUEST_CONS_OVERFLOW(&blkdev->rings.common, rc))
                    523:             break;
                    524:         ioreq = ioreq_start(blkdev);
                    525:         if (ioreq == NULL) {
                    526:             blkdev->more_work++;
                    527:             break;
                    528:         }
                    529:         blk_get_request(blkdev, ioreq, rc);
                    530:         blkdev->rings.common.req_cons = ++rc;
                    531: 
                    532:         /* parse them */
                    533:         if (ioreq_parse(ioreq) != 0) {
                    534:             if (blk_send_response_one(ioreq))
                    535:                 xen_be_send_notify(&blkdev->xendev);
                    536:             ioreq_release(ioreq);
                    537:             continue;
                    538:         }
                    539: 
                    540:         if (use_aio) {
                    541:             /* run i/o in aio mode */
                    542:             ioreq_runio_qemu_aio(ioreq);
                    543:         } else {
                    544:             /* run i/o in sync mode */
                    545:             ioreq_runio_qemu_sync(ioreq);
                    546:         }
                    547:     }
                    548:     if (!use_aio)
                    549:         blk_send_response_all(blkdev);
                    550: 
                    551:     if (blkdev->more_work && blkdev->requests_inflight < max_requests)
                    552:         qemu_bh_schedule(blkdev->bh);
                    553: }
                    554: 
                    555: /* ------------------------------------------------------------- */
                    556: 
                    557: static void blk_bh(void *opaque)
                    558: {
                    559:     struct XenBlkDev *blkdev = opaque;
                    560:     blk_handle_requests(blkdev);
                    561: }
                    562: 
                    563: static void blk_alloc(struct XenDevice *xendev)
                    564: {
                    565:     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
                    566: 
1.1.1.2   root      567:     QLIST_INIT(&blkdev->inflight);
                    568:     QLIST_INIT(&blkdev->finished);
                    569:     QLIST_INIT(&blkdev->freelist);
1.1       root      570:     blkdev->bh = qemu_bh_new(blk_bh, blkdev);
                    571:     if (xen_mode != XEN_EMULATE)
                    572:         batch_maps = 1;
                    573: }
                    574: 
                    575: static int blk_init(struct XenDevice *xendev)
                    576: {
                    577:     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
1.1.1.3 ! root      578:     int index, qflags, have_barriers, info = 0;
1.1       root      579:     char *h;
                    580: 
                    581:     /* read xenstore entries */
                    582:     if (blkdev->params == NULL) {
                    583:        blkdev->params = xenstore_read_be_str(&blkdev->xendev, "params");
                    584:         h = strchr(blkdev->params, ':');
                    585:        if (h != NULL) {
                    586:            blkdev->fileproto = blkdev->params;
                    587:            blkdev->filename  = h+1;
                    588:            *h = 0;
                    589:        } else {
                    590:            blkdev->fileproto = "<unset>";
                    591:            blkdev->filename  = blkdev->params;
                    592:        }
                    593:     }
                    594:     if (blkdev->mode == NULL)
                    595:        blkdev->mode = xenstore_read_be_str(&blkdev->xendev, "mode");
                    596:     if (blkdev->type == NULL)
                    597:        blkdev->type = xenstore_read_be_str(&blkdev->xendev, "type");
                    598:     if (blkdev->dev == NULL)
                    599:        blkdev->dev = xenstore_read_be_str(&blkdev->xendev, "dev");
                    600:     if (blkdev->devtype == NULL)
                    601:        blkdev->devtype = xenstore_read_be_str(&blkdev->xendev, "device-type");
                    602: 
                    603:     /* do we have all we need? */
                    604:     if (blkdev->params == NULL ||
                    605:        blkdev->mode == NULL   ||
                    606:        blkdev->type == NULL   ||
                    607:        blkdev->dev == NULL)
                    608:        return -1;
                    609: 
                    610:     /* read-only ? */
                    611:     if (strcmp(blkdev->mode, "w") == 0) {
                    612:        qflags = BDRV_O_RDWR;
                    613:     } else {
1.1.1.3 ! root      614:        qflags = 0;
1.1       root      615:        info  |= VDISK_READONLY;
                    616:     }
                    617: 
                    618:     /* cdrom ? */
                    619:     if (blkdev->devtype && !strcmp(blkdev->devtype, "cdrom"))
                    620:        info  |= VDISK_CDROM;
                    621: 
                    622:     /* init qemu block driver */
1.1.1.2   root      623:     index = (blkdev->xendev.dev - 202 * 256) / 16;
                    624:     blkdev->dinfo = drive_get(IF_XEN, 0, index);
                    625:     if (!blkdev->dinfo) {
1.1       root      626:         /* setup via xenbus -> create new block driver instance */
                    627:         xen_be_printf(&blkdev->xendev, 2, "create new bdrv (xenbus setup)\n");
                    628:        blkdev->bs = bdrv_new(blkdev->dev);
                    629:        if (blkdev->bs) {
1.1.1.3 ! root      630:            if (bdrv_open(blkdev->bs, blkdev->filename, qflags,
1.1.1.2   root      631:                            bdrv_find_whitelisted_format(blkdev->fileproto))
                    632:                 != 0) {
1.1       root      633:                bdrv_delete(blkdev->bs);
                    634:                blkdev->bs = NULL;
                    635:            }
                    636:        }
                    637:        if (!blkdev->bs)
                    638:            return -1;
                    639:     } else {
                    640:         /* setup via qemu cmdline -> already setup for us */
                    641:         xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n");
1.1.1.2   root      642:        blkdev->bs = blkdev->dinfo->bdrv;
1.1       root      643:     }
                    644:     blkdev->file_blk  = BLOCK_SIZE;
                    645:     blkdev->file_size = bdrv_getlength(blkdev->bs);
                    646:     if (blkdev->file_size < 0) {
                    647:         xen_be_printf(&blkdev->xendev, 1, "bdrv_getlength: %d (%s) | drv %s\n",
                    648:                       (int)blkdev->file_size, strerror(-blkdev->file_size),
                    649:                       blkdev->bs->drv ? blkdev->bs->drv->format_name : "-");
                    650:        blkdev->file_size = 0;
                    651:     }
                    652:     have_barriers = blkdev->bs->drv && blkdev->bs->drv->bdrv_flush ? 1 : 0;
                    653: 
                    654:     xen_be_printf(xendev, 1, "type \"%s\", fileproto \"%s\", filename \"%s\","
                    655:                  " size %" PRId64 " (%" PRId64 " MB)\n",
                    656:                  blkdev->type, blkdev->fileproto, blkdev->filename,
                    657:                  blkdev->file_size, blkdev->file_size >> 20);
                    658: 
                    659:     /* fill info */
                    660:     xenstore_write_be_int(&blkdev->xendev, "feature-barrier", have_barriers);
                    661:     xenstore_write_be_int(&blkdev->xendev, "info",            info);
                    662:     xenstore_write_be_int(&blkdev->xendev, "sector-size",     blkdev->file_blk);
                    663:     xenstore_write_be_int(&blkdev->xendev, "sectors",
                    664:                          blkdev->file_size / blkdev->file_blk);
                    665:     return 0;
                    666: }
                    667: 
                    668: static int blk_connect(struct XenDevice *xendev)
                    669: {
                    670:     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
                    671: 
                    672:     if (xenstore_read_fe_int(&blkdev->xendev, "ring-ref", &blkdev->ring_ref) == -1)
                    673:        return -1;
                    674:     if (xenstore_read_fe_int(&blkdev->xendev, "event-channel",
                    675:                              &blkdev->xendev.remote_port) == -1)
                    676:        return -1;
                    677: 
                    678:     blkdev->protocol = BLKIF_PROTOCOL_NATIVE;
                    679:     if (blkdev->xendev.protocol) {
                    680:         if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_32) == 0)
                    681:             blkdev->protocol = BLKIF_PROTOCOL_X86_32;
                    682:         if (strcmp(blkdev->xendev.protocol, XEN_IO_PROTO_ABI_X86_64) == 0)
                    683:             blkdev->protocol = BLKIF_PROTOCOL_X86_64;
                    684:     }
                    685: 
                    686:     blkdev->sring = xc_gnttab_map_grant_ref(blkdev->xendev.gnttabdev,
                    687:                                            blkdev->xendev.dom,
                    688:                                            blkdev->ring_ref,
                    689:                                            PROT_READ | PROT_WRITE);
                    690:     if (!blkdev->sring)
                    691:        return -1;
                    692:     blkdev->cnt_map++;
                    693: 
                    694:     switch (blkdev->protocol) {
                    695:     case BLKIF_PROTOCOL_NATIVE:
                    696:     {
                    697:        blkif_sring_t *sring_native = blkdev->sring;
                    698:        BACK_RING_INIT(&blkdev->rings.native, sring_native, XC_PAGE_SIZE);
                    699:        break;
                    700:     }
                    701:     case BLKIF_PROTOCOL_X86_32:
                    702:     {
                    703:        blkif_x86_32_sring_t *sring_x86_32 = blkdev->sring;
                    704: 
                    705:         BACK_RING_INIT(&blkdev->rings.x86_32_part, sring_x86_32, XC_PAGE_SIZE);
                    706:        break;
                    707:     }
                    708:     case BLKIF_PROTOCOL_X86_64:
                    709:     {
                    710:        blkif_x86_64_sring_t *sring_x86_64 = blkdev->sring;
                    711: 
                    712:         BACK_RING_INIT(&blkdev->rings.x86_64_part, sring_x86_64, XC_PAGE_SIZE);
                    713:        break;
                    714:     }
                    715:     }
                    716: 
                    717:     xen_be_bind_evtchn(&blkdev->xendev);
                    718: 
                    719:     xen_be_printf(&blkdev->xendev, 1, "ok: proto %s, ring-ref %d, "
                    720:                  "remote port %d, local port %d\n",
                    721:                  blkdev->xendev.protocol, blkdev->ring_ref,
                    722:                  blkdev->xendev.remote_port, blkdev->xendev.local_port);
                    723:     return 0;
                    724: }
                    725: 
                    726: static void blk_disconnect(struct XenDevice *xendev)
                    727: {
                    728:     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
                    729: 
                    730:     if (blkdev->bs) {
1.1.1.2   root      731:         if (!blkdev->dinfo) {
1.1       root      732:             /* close/delete only if we created it ourself */
                    733:             bdrv_close(blkdev->bs);
                    734:             bdrv_delete(blkdev->bs);
                    735:         }
                    736:        blkdev->bs = NULL;
                    737:     }
                    738:     xen_be_unbind_evtchn(&blkdev->xendev);
                    739: 
                    740:     if (blkdev->sring) {
                    741:        xc_gnttab_munmap(blkdev->xendev.gnttabdev, blkdev->sring, 1);
                    742:        blkdev->cnt_map--;
                    743:        blkdev->sring = NULL;
                    744:     }
                    745: }
                    746: 
                    747: static int blk_free(struct XenDevice *xendev)
                    748: {
                    749:     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
                    750:     struct ioreq *ioreq;
                    751: 
1.1.1.2   root      752:     while (!QLIST_EMPTY(&blkdev->freelist)) {
                    753:        ioreq = QLIST_FIRST(&blkdev->freelist);
                    754:         QLIST_REMOVE(ioreq, list);
1.1       root      755:         qemu_iovec_destroy(&ioreq->v);
                    756:        qemu_free(ioreq);
                    757:     }
                    758: 
                    759:     qemu_free(blkdev->params);
                    760:     qemu_free(blkdev->mode);
                    761:     qemu_free(blkdev->type);
                    762:     qemu_free(blkdev->dev);
                    763:     qemu_free(blkdev->devtype);
                    764:     qemu_bh_delete(blkdev->bh);
                    765:     return 0;
                    766: }
                    767: 
                    768: static void blk_event(struct XenDevice *xendev)
                    769: {
                    770:     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
                    771: 
                    772:     qemu_bh_schedule(blkdev->bh);
                    773: }
                    774: 
                    775: struct XenDevOps xen_blkdev_ops = {
                    776:     .size       = sizeof(struct XenBlkDev),
                    777:     .flags      = DEVOPS_FLAG_NEED_GNTDEV,
                    778:     .alloc      = blk_alloc,
                    779:     .init       = blk_init,
                    780:     .connect    = blk_connect,
                    781:     .disconnect = blk_disconnect,
                    782:     .event      = blk_event,
                    783:     .free       = blk_free,
                    784: };

unix.superglobalmegacorp.com