Annotation of qemu/linux-aio.c, revision 1.1

1.1     ! root        1: /*
        !             2:  * Linux native AIO support.
        !             3:  *
        !             4:  * Copyright (C) 2009 IBM, Corp.
        !             5:  * Copyright (C) 2009 Red Hat, Inc.
        !             6:  *
        !             7:  * This work is licensed under the terms of the GNU GPL, version 2 or later.
        !             8:  * See the COPYING file in the top-level directory.
        !             9:  */
        !            10: #include "qemu-common.h"
        !            11: #include "qemu-aio.h"
        !            12: #include "block_int.h"
        !            13: #include "block/raw-posix-aio.h"
        !            14: 
        !            15: #include <sys/eventfd.h>
        !            16: #include <libaio.h>
        !            17: 
        !            18: /*
        !            19:  * Queue size (per-device).
        !            20:  *
        !            21:  * XXX: eventually we need to communicate this to the guest and/or make it
        !            22:  *      tunable by the guest.  If we get more outstanding requests at a time
        !            23:  *      than this we will get EAGAIN from io_submit which is communicated to
        !            24:  *      the guest as an I/O error.
        !            25:  */
        !            26: #define MAX_EVENTS 128
        !            27: 
        !            28: struct qemu_laiocb {
        !            29:     BlockDriverAIOCB common;
        !            30:     struct qemu_laio_state *ctx;
        !            31:     struct iocb iocb;
        !            32:     ssize_t ret;
        !            33:     size_t nbytes;
        !            34:     int async_context_id;
        !            35:     QLIST_ENTRY(qemu_laiocb) node;
        !            36: };
        !            37: 
        !            38: struct qemu_laio_state {
        !            39:     io_context_t ctx;
        !            40:     int efd;
        !            41:     int count;
        !            42:     QLIST_HEAD(, qemu_laiocb) completed_reqs;
        !            43: };
        !            44: 
        !            45: static inline ssize_t io_event_ret(struct io_event *ev)
        !            46: {
        !            47:     return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res);
        !            48: }
        !            49: 
        !            50: /*
        !            51:  * Completes an AIO request (calls the callback and frees the ACB).
        !            52:  * Be sure to be in the right AsyncContext before calling this function.
        !            53:  */
        !            54: static void qemu_laio_process_completion(struct qemu_laio_state *s,
        !            55:     struct qemu_laiocb *laiocb)
        !            56: {
        !            57:     int ret;
        !            58: 
        !            59:     s->count--;
        !            60: 
        !            61:     ret = laiocb->ret;
        !            62:     if (ret != -ECANCELED) {
        !            63:         if (ret == laiocb->nbytes)
        !            64:             ret = 0;
        !            65:         else if (ret >= 0)
        !            66:             ret = -EINVAL;
        !            67: 
        !            68:         laiocb->common.cb(laiocb->common.opaque, ret);
        !            69:     }
        !            70: 
        !            71:     qemu_aio_release(laiocb);
        !            72: }
        !            73: 
        !            74: /*
        !            75:  * Processes all queued AIO requests, i.e. requests that have return from OS
        !            76:  * but their callback was not called yet. Requests that cannot have their
        !            77:  * callback called in the current AsyncContext, remain in the queue.
        !            78:  *
        !            79:  * Returns 1 if at least one request could be completed, 0 otherwise.
        !            80:  */
        !            81: static int qemu_laio_process_requests(void *opaque)
        !            82: {
        !            83:     struct qemu_laio_state *s = opaque;
        !            84:     struct qemu_laiocb *laiocb, *next;
        !            85:     int res = 0;
        !            86: 
        !            87:     QLIST_FOREACH_SAFE (laiocb, &s->completed_reqs, node, next) {
        !            88:         if (laiocb->async_context_id == get_async_context_id()) {
        !            89:             qemu_laio_process_completion(s, laiocb);
        !            90:             QLIST_REMOVE(laiocb, node);
        !            91:             res = 1;
        !            92:         }
        !            93:     }
        !            94: 
        !            95:     return res;
        !            96: }
        !            97: 
        !            98: /*
        !            99:  * Puts a request in the completion queue so that its callback is called the
        !           100:  * next time when it's possible. If we already are in the right AsyncContext,
        !           101:  * the request is completed immediately instead.
        !           102:  */
        !           103: static void qemu_laio_enqueue_completed(struct qemu_laio_state *s,
        !           104:     struct qemu_laiocb* laiocb)
        !           105: {
        !           106:     if (laiocb->async_context_id == get_async_context_id()) {
        !           107:         qemu_laio_process_completion(s, laiocb);
        !           108:     } else {
        !           109:         QLIST_INSERT_HEAD(&s->completed_reqs, laiocb, node);
        !           110:     }
        !           111: }
        !           112: 
        !           113: static void qemu_laio_completion_cb(void *opaque)
        !           114: {
        !           115:     struct qemu_laio_state *s = opaque;
        !           116: 
        !           117:     while (1) {
        !           118:         struct io_event events[MAX_EVENTS];
        !           119:         uint64_t val;
        !           120:         ssize_t ret;
        !           121:         struct timespec ts = { 0 };
        !           122:         int nevents, i;
        !           123: 
        !           124:         do {
        !           125:             ret = read(s->efd, &val, sizeof(val));
        !           126:         } while (ret == 1 && errno == EINTR);
        !           127: 
        !           128:         if (ret == -1 && errno == EAGAIN)
        !           129:             break;
        !           130: 
        !           131:         if (ret != 8)
        !           132:             break;
        !           133: 
        !           134:         do {
        !           135:             nevents = io_getevents(s->ctx, val, MAX_EVENTS, events, &ts);
        !           136:         } while (nevents == -EINTR);
        !           137: 
        !           138:         for (i = 0; i < nevents; i++) {
        !           139:             struct iocb *iocb = events[i].obj;
        !           140:             struct qemu_laiocb *laiocb =
        !           141:                     container_of(iocb, struct qemu_laiocb, iocb);
        !           142: 
        !           143:             laiocb->ret = io_event_ret(&events[i]);
        !           144:             qemu_laio_enqueue_completed(s, laiocb);
        !           145:         }
        !           146:     }
        !           147: }
        !           148: 
        !           149: static int qemu_laio_flush_cb(void *opaque)
        !           150: {
        !           151:     struct qemu_laio_state *s = opaque;
        !           152: 
        !           153:     return (s->count > 0) ? 1 : 0;
        !           154: }
        !           155: 
        !           156: static void laio_cancel(BlockDriverAIOCB *blockacb)
        !           157: {
        !           158:     struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
        !           159:     struct io_event event;
        !           160:     int ret;
        !           161: 
        !           162:     if (laiocb->ret != -EINPROGRESS)
        !           163:         return;
        !           164: 
        !           165:     /*
        !           166:      * Note that as of Linux 2.6.31 neither the block device code nor any
        !           167:      * filesystem implements cancellation of AIO request.
        !           168:      * Thus the polling loop below is the normal code path.
        !           169:      */
        !           170:     ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event);
        !           171:     if (ret == 0) {
        !           172:         laiocb->ret = -ECANCELED;
        !           173:         return;
        !           174:     }
        !           175: 
        !           176:     /*
        !           177:      * We have to wait for the iocb to finish.
        !           178:      *
        !           179:      * The only way to get the iocb status update is by polling the io context.
        !           180:      * We might be able to do this slightly more optimal by removing the
        !           181:      * O_NONBLOCK flag.
        !           182:      */
        !           183:     while (laiocb->ret == -EINPROGRESS)
        !           184:         qemu_laio_completion_cb(laiocb->ctx);
        !           185: }
        !           186: 
        !           187: static AIOPool laio_pool = {
        !           188:     .aiocb_size         = sizeof(struct qemu_laiocb),
        !           189:     .cancel             = laio_cancel,
        !           190: };
        !           191: 
        !           192: BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
        !           193:         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        !           194:         BlockDriverCompletionFunc *cb, void *opaque, int type)
        !           195: {
        !           196:     struct qemu_laio_state *s = aio_ctx;
        !           197:     struct qemu_laiocb *laiocb;
        !           198:     struct iocb *iocbs;
        !           199:     off_t offset = sector_num * 512;
        !           200: 
        !           201:     laiocb = qemu_aio_get(&laio_pool, bs, cb, opaque);
        !           202:     if (!laiocb)
        !           203:         return NULL;
        !           204:     laiocb->nbytes = nb_sectors * 512;
        !           205:     laiocb->ctx = s;
        !           206:     laiocb->ret = -EINPROGRESS;
        !           207:     laiocb->async_context_id = get_async_context_id();
        !           208: 
        !           209:     iocbs = &laiocb->iocb;
        !           210: 
        !           211:     switch (type) {
        !           212:     case QEMU_AIO_WRITE:
        !           213:         io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
        !           214:        break;
        !           215:     case QEMU_AIO_READ:
        !           216:         io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset);
        !           217:        break;
        !           218:     default:
        !           219:         fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
        !           220:                         __func__, type);
        !           221:         goto out_free_aiocb;
        !           222:     }
        !           223:     io_set_eventfd(&laiocb->iocb, s->efd);
        !           224:     s->count++;
        !           225: 
        !           226:     if (io_submit(s->ctx, 1, &iocbs) < 0)
        !           227:         goto out_dec_count;
        !           228:     return &laiocb->common;
        !           229: 
        !           230: out_free_aiocb:
        !           231:     qemu_aio_release(laiocb);
        !           232: out_dec_count:
        !           233:     s->count--;
        !           234:     return NULL;
        !           235: }
        !           236: 
        !           237: void *laio_init(void)
        !           238: {
        !           239:     struct qemu_laio_state *s;
        !           240: 
        !           241:     s = qemu_mallocz(sizeof(*s));
        !           242:     QLIST_INIT(&s->completed_reqs);
        !           243:     s->efd = eventfd(0, 0);
        !           244:     if (s->efd == -1)
        !           245:         goto out_free_state;
        !           246:     fcntl(s->efd, F_SETFL, O_NONBLOCK);
        !           247: 
        !           248:     if (io_setup(MAX_EVENTS, &s->ctx) != 0)
        !           249:         goto out_close_efd;
        !           250: 
        !           251:     qemu_aio_set_fd_handler(s->efd, qemu_laio_completion_cb, NULL,
        !           252:         qemu_laio_flush_cb, qemu_laio_process_requests, s);
        !           253: 
        !           254:     return s;
        !           255: 
        !           256: out_close_efd:
        !           257:     close(s->efd);
        !           258: out_free_state:
        !           259:     qemu_free(s);
        !           260:     return NULL;
        !           261: }

unix.superglobalmegacorp.com