File:  [Qemu by Fabrice Bellard] / qemu / linux-aio.c
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs
Tue Apr 24 17:34:58 2018 UTC (2 years, 2 months ago) by root
Branches: qemu, MAIN
CVS tags: qemu0125, qemu0124, qemu0123, qemu0122, qemu0121, qemu0120, HEAD
qemu 0.12.0

    1: /*
    2:  * Linux native AIO support.
    3:  *
    4:  * Copyright (C) 2009 IBM, Corp.
    5:  * Copyright (C) 2009 Red Hat, Inc.
    6:  *
    7:  * This work is licensed under the terms of the GNU GPL, version 2 or later.
    8:  * See the COPYING file in the top-level directory.
    9:  */
   10: #include "qemu-common.h"
   11: #include "qemu-aio.h"
   12: #include "block_int.h"
   13: #include "block/raw-posix-aio.h"
   14: 
   15: #include <sys/eventfd.h>
   16: #include <libaio.h>
   17: 
   18: /*
   19:  * Queue size (per-device).
   20:  *
   21:  * XXX: eventually we need to communicate this to the guest and/or make it
   22:  *      tunable by the guest.  If we get more outstanding requests at a time
   23:  *      than this we will get EAGAIN from io_submit which is communicated to
   24:  *      the guest as an I/O error.
   25:  */
   26: #define MAX_EVENTS 128
   27: 
   28: struct qemu_laiocb {
   29:     BlockDriverAIOCB common;
   30:     struct qemu_laio_state *ctx;
   31:     struct iocb iocb;
   32:     ssize_t ret;
   33:     size_t nbytes;
   34:     int async_context_id;
   35:     QLIST_ENTRY(qemu_laiocb) node;
   36: };
   37: 
   38: struct qemu_laio_state {
   39:     io_context_t ctx;
   40:     int efd;
   41:     int count;
   42:     QLIST_HEAD(, qemu_laiocb) completed_reqs;
   43: };
   44: 
   45: static inline ssize_t io_event_ret(struct io_event *ev)
   46: {
   47:     return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res);
   48: }
   49: 
   50: /*
   51:  * Completes an AIO request (calls the callback and frees the ACB).
   52:  * Be sure to be in the right AsyncContext before calling this function.
   53:  */
   54: static void qemu_laio_process_completion(struct qemu_laio_state *s,
   55:     struct qemu_laiocb *laiocb)
   56: {
   57:     int ret;
   58: 
   59:     s->count--;
   60: 
   61:     ret = laiocb->ret;
   62:     if (ret != -ECANCELED) {
   63:         if (ret == laiocb->nbytes)
   64:             ret = 0;
   65:         else if (ret >= 0)
   66:             ret = -EINVAL;
   67: 
   68:         laiocb->common.cb(laiocb->common.opaque, ret);
   69:     }
   70: 
   71:     qemu_aio_release(laiocb);
   72: }
   73: 
   74: /*
   75:  * Processes all queued AIO requests, i.e. requests that have return from OS
   76:  * but their callback was not called yet. Requests that cannot have their
   77:  * callback called in the current AsyncContext, remain in the queue.
   78:  *
   79:  * Returns 1 if at least one request could be completed, 0 otherwise.
   80:  */
   81: static int qemu_laio_process_requests(void *opaque)
   82: {
   83:     struct qemu_laio_state *s = opaque;
   84:     struct qemu_laiocb *laiocb, *next;
   85:     int res = 0;
   86: 
   87:     QLIST_FOREACH_SAFE (laiocb, &s->completed_reqs, node, next) {
   88:         if (laiocb->async_context_id == get_async_context_id()) {
   89:             qemu_laio_process_completion(s, laiocb);
   90:             QLIST_REMOVE(laiocb, node);
   91:             res = 1;
   92:         }
   93:     }
   94: 
   95:     return res;
   96: }
   97: 
   98: /*
   99:  * Puts a request in the completion queue so that its callback is called the
  100:  * next time when it's possible. If we already are in the right AsyncContext,
  101:  * the request is completed immediately instead.
  102:  */
  103: static void qemu_laio_enqueue_completed(struct qemu_laio_state *s,
  104:     struct qemu_laiocb* laiocb)
  105: {
  106:     if (laiocb->async_context_id == get_async_context_id()) {
  107:         qemu_laio_process_completion(s, laiocb);
  108:     } else {
  109:         QLIST_INSERT_HEAD(&s->completed_reqs, laiocb, node);
  110:     }
  111: }
  112: 
  113: static void qemu_laio_completion_cb(void *opaque)
  114: {
  115:     struct qemu_laio_state *s = opaque;
  116: 
  117:     while (1) {
  118:         struct io_event events[MAX_EVENTS];
  119:         uint64_t val;
  120:         ssize_t ret;
  121:         struct timespec ts = { 0 };
  122:         int nevents, i;
  123: 
  124:         do {
  125:             ret = read(s->efd, &val, sizeof(val));
  126:         } while (ret == 1 && errno == EINTR);
  127: 
  128:         if (ret == -1 && errno == EAGAIN)
  129:             break;
  130: 
  131:         if (ret != 8)
  132:             break;
  133: 
  134:         do {
  135:             nevents = io_getevents(s->ctx, val, MAX_EVENTS, events, &ts);
  136:         } while (nevents == -EINTR);
  137: 
  138:         for (i = 0; i < nevents; i++) {
  139:             struct iocb *iocb = events[i].obj;
  140:             struct qemu_laiocb *laiocb =
  141:                     container_of(iocb, struct qemu_laiocb, iocb);
  142: 
  143:             laiocb->ret = io_event_ret(&events[i]);
  144:             qemu_laio_enqueue_completed(s, laiocb);
  145:         }
  146:     }
  147: }
  148: 
  149: static int qemu_laio_flush_cb(void *opaque)
  150: {
  151:     struct qemu_laio_state *s = opaque;
  152: 
  153:     return (s->count > 0) ? 1 : 0;
  154: }
  155: 
  156: static void laio_cancel(BlockDriverAIOCB *blockacb)
  157: {
  158:     struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
  159:     struct io_event event;
  160:     int ret;
  161: 
  162:     if (laiocb->ret != -EINPROGRESS)
  163:         return;
  164: 
  165:     /*
  166:      * Note that as of Linux 2.6.31 neither the block device code nor any
  167:      * filesystem implements cancellation of AIO request.
  168:      * Thus the polling loop below is the normal code path.
  169:      */
  170:     ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event);
  171:     if (ret == 0) {
  172:         laiocb->ret = -ECANCELED;
  173:         return;
  174:     }
  175: 
  176:     /*
  177:      * We have to wait for the iocb to finish.
  178:      *
  179:      * The only way to get the iocb status update is by polling the io context.
  180:      * We might be able to do this slightly more optimal by removing the
  181:      * O_NONBLOCK flag.
  182:      */
  183:     while (laiocb->ret == -EINPROGRESS)
  184:         qemu_laio_completion_cb(laiocb->ctx);
  185: }
  186: 
  187: static AIOPool laio_pool = {
  188:     .aiocb_size         = sizeof(struct qemu_laiocb),
  189:     .cancel             = laio_cancel,
  190: };
  191: 
  192: BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
  193:         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
  194:         BlockDriverCompletionFunc *cb, void *opaque, int type)
  195: {
  196:     struct qemu_laio_state *s = aio_ctx;
  197:     struct qemu_laiocb *laiocb;
  198:     struct iocb *iocbs;
  199:     off_t offset = sector_num * 512;
  200: 
  201:     laiocb = qemu_aio_get(&laio_pool, bs, cb, opaque);
  202:     if (!laiocb)
  203:         return NULL;
  204:     laiocb->nbytes = nb_sectors * 512;
  205:     laiocb->ctx = s;
  206:     laiocb->ret = -EINPROGRESS;
  207:     laiocb->async_context_id = get_async_context_id();
  208: 
  209:     iocbs = &laiocb->iocb;
  210: 
  211:     switch (type) {
  212:     case QEMU_AIO_WRITE:
  213:         io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
  214: 	break;
  215:     case QEMU_AIO_READ:
  216:         io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset);
  217: 	break;
  218:     default:
  219:         fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
  220:                         __func__, type);
  221:         goto out_free_aiocb;
  222:     }
  223:     io_set_eventfd(&laiocb->iocb, s->efd);
  224:     s->count++;
  225: 
  226:     if (io_submit(s->ctx, 1, &iocbs) < 0)
  227:         goto out_dec_count;
  228:     return &laiocb->common;
  229: 
  230: out_free_aiocb:
  231:     qemu_aio_release(laiocb);
  232: out_dec_count:
  233:     s->count--;
  234:     return NULL;
  235: }
  236: 
  237: void *laio_init(void)
  238: {
  239:     struct qemu_laio_state *s;
  240: 
  241:     s = qemu_mallocz(sizeof(*s));
  242:     QLIST_INIT(&s->completed_reqs);
  243:     s->efd = eventfd(0, 0);
  244:     if (s->efd == -1)
  245:         goto out_free_state;
  246:     fcntl(s->efd, F_SETFL, O_NONBLOCK);
  247: 
  248:     if (io_setup(MAX_EVENTS, &s->ctx) != 0)
  249:         goto out_close_efd;
  250: 
  251:     qemu_aio_set_fd_handler(s->efd, qemu_laio_completion_cb, NULL,
  252:         qemu_laio_flush_cb, qemu_laio_process_requests, s);
  253: 
  254:     return s;
  255: 
  256: out_close_efd:
  257:     close(s->efd);
  258: out_free_state:
  259:     qemu_free(s);
  260:     return NULL;
  261: }

unix.superglobalmegacorp.com