Annotation of qemu/block/qed.h, revision 1.1.1.4

1.1       root        1: /*
                      2:  * QEMU Enhanced Disk Format
                      3:  *
                      4:  * Copyright IBM, Corp. 2010
                      5:  *
                      6:  * Authors:
                      7:  *  Stefan Hajnoczi   <[email protected]>
                      8:  *  Anthony Liguori   <[email protected]>
                      9:  *
                     10:  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
                     11:  * See the COPYING.LIB file in the top-level directory.
                     12:  *
                     13:  */
                     14: 
                     15: #ifndef BLOCK_QED_H
                     16: #define BLOCK_QED_H
                     17: 
                     18: #include "block_int.h"
                     19: 
                     20: /* The layout of a QED file is as follows:
                     21:  *
                     22:  * +--------+----------+----------+----------+-----+
                     23:  * | header | L1 table | cluster0 | cluster1 | ... |
                     24:  * +--------+----------+----------+----------+-----+
                     25:  *
                     26:  * There is a 2-level pagetable for cluster allocation:
                     27:  *
                     28:  *                     +----------+
                     29:  *                     | L1 table |
                     30:  *                     +----------+
                     31:  *                ,------'  |  '------.
                     32:  *           +----------+   |    +----------+
                     33:  *           | L2 table |  ...   | L2 table |
                     34:  *           +----------+        +----------+
                     35:  *       ,------'  |  '------.
                     36:  *  +----------+   |    +----------+
                     37:  *  |   Data   |  ...   |   Data   |
                     38:  *  +----------+        +----------+
                     39:  *
                     40:  * The L1 table is fixed size and always present.  L2 tables are allocated on
                     41:  * demand.  The L1 table size determines the maximum possible image size; it
                     42:  * can be influenced using the cluster_size and table_size values.
                     43:  *
                     44:  * All fields are little-endian on disk.
                     45:  */
                     46: 
                     47: enum {
                     48:     QED_MAGIC = 'Q' | 'E' << 8 | 'D' << 16 | '\0' << 24,
                     49: 
                     50:     /* The image supports a backing file */
                     51:     QED_F_BACKING_FILE = 0x01,
                     52: 
                     53:     /* The image needs a consistency check before use */
                     54:     QED_F_NEED_CHECK = 0x02,
                     55: 
                     56:     /* The backing file format must not be probed, treat as raw image */
                     57:     QED_F_BACKING_FORMAT_NO_PROBE = 0x04,
                     58: 
                     59:     /* Feature bits must be used when the on-disk format changes */
                     60:     QED_FEATURE_MASK = QED_F_BACKING_FILE | /* supported feature bits */
                     61:                        QED_F_NEED_CHECK |
                     62:                        QED_F_BACKING_FORMAT_NO_PROBE,
                     63:     QED_COMPAT_FEATURE_MASK = 0,            /* supported compat feature bits */
                     64:     QED_AUTOCLEAR_FEATURE_MASK = 0,         /* supported autoclear feature bits */
                     65: 
                     66:     /* Data is stored in groups of sectors called clusters.  Cluster size must
                     67:      * be large to avoid keeping too much metadata.  I/O requests that have
                     68:      * sub-cluster size will require read-modify-write.
                     69:      */
                     70:     QED_MIN_CLUSTER_SIZE = 4 * 1024, /* in bytes */
                     71:     QED_MAX_CLUSTER_SIZE = 64 * 1024 * 1024,
                     72:     QED_DEFAULT_CLUSTER_SIZE = 64 * 1024,
                     73: 
                     74:     /* Allocated clusters are tracked using a 2-level pagetable.  Table size is
                     75:      * a multiple of clusters so large maximum image sizes can be supported
                     76:      * without jacking up the cluster size too much.
                     77:      */
                     78:     QED_MIN_TABLE_SIZE = 1,        /* in clusters */
                     79:     QED_MAX_TABLE_SIZE = 16,
                     80:     QED_DEFAULT_TABLE_SIZE = 4,
1.1.1.3   root       81: 
                     82:     /* Delay to flush and clean image after last allocating write completes */
                     83:     QED_NEED_CHECK_TIMEOUT = 5,    /* in seconds */
1.1       root       84: };
                     85: 
                     86: typedef struct {
                     87:     uint32_t magic;                 /* QED\0 */
                     88: 
                     89:     uint32_t cluster_size;          /* in bytes */
                     90:     uint32_t table_size;            /* for L1 and L2 tables, in clusters */
                     91:     uint32_t header_size;           /* in clusters */
                     92: 
                     93:     uint64_t features;              /* format feature bits */
                     94:     uint64_t compat_features;       /* compatible feature bits */
                     95:     uint64_t autoclear_features;    /* self-resetting feature bits */
                     96: 
                     97:     uint64_t l1_table_offset;       /* in bytes */
                     98:     uint64_t image_size;            /* total logical image size, in bytes */
                     99: 
                    100:     /* if (features & QED_F_BACKING_FILE) */
                    101:     uint32_t backing_filename_offset; /* in bytes from start of header */
                    102:     uint32_t backing_filename_size;   /* in bytes */
                    103: } QEDHeader;
                    104: 
                    105: typedef struct {
                    106:     uint64_t offsets[0];            /* in bytes */
                    107: } QEDTable;
                    108: 
                    109: /* The L2 cache is a simple write-through cache for L2 structures */
                    110: typedef struct CachedL2Table {
                    111:     QEDTable *table;
                    112:     uint64_t offset;    /* offset=0 indicates an invalidate entry */
                    113:     QTAILQ_ENTRY(CachedL2Table) node;
                    114:     int ref;
                    115: } CachedL2Table;
                    116: 
                    117: typedef struct {
                    118:     QTAILQ_HEAD(, CachedL2Table) entries;
                    119:     unsigned int n_entries;
                    120: } L2TableCache;
                    121: 
                    122: typedef struct QEDRequest {
                    123:     CachedL2Table *l2_table;
                    124: } QEDRequest;
                    125: 
                    126: typedef struct QEDAIOCB {
                    127:     BlockDriverAIOCB common;
                    128:     QEMUBH *bh;
                    129:     int bh_ret;                     /* final return status for completion bh */
                    130:     QSIMPLEQ_ENTRY(QEDAIOCB) next;  /* next request */
                    131:     bool is_write;                  /* false - read, true - write */
                    132:     bool *finished;                 /* signal for cancel completion */
                    133:     uint64_t end_pos;               /* request end on block device, in bytes */
                    134: 
                    135:     /* User scatter-gather list */
                    136:     QEMUIOVector *qiov;
                    137:     size_t qiov_offset;             /* byte count already processed */
                    138: 
                    139:     /* Current cluster scatter-gather list */
                    140:     QEMUIOVector cur_qiov;
                    141:     uint64_t cur_pos;               /* position on block device, in bytes */
                    142:     uint64_t cur_cluster;           /* cluster offset in image file */
                    143:     unsigned int cur_nclusters;     /* number of clusters being accessed */
                    144:     int find_cluster_ret;           /* used for L1/L2 update */
                    145: 
                    146:     QEDRequest request;
                    147: } QEDAIOCB;
                    148: 
                    149: typedef struct {
                    150:     BlockDriverState *bs;           /* device */
                    151:     uint64_t file_size;             /* length of image file, in bytes */
                    152: 
                    153:     QEDHeader header;               /* always cpu-endian */
                    154:     QEDTable *l1_table;
                    155:     L2TableCache l2_cache;          /* l2 table cache */
                    156:     uint32_t table_nelems;
                    157:     uint32_t l1_shift;
                    158:     uint32_t l2_shift;
                    159:     uint32_t l2_mask;
                    160: 
                    161:     /* Allocating write request queue */
                    162:     QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs;
1.1.1.3   root      163:     bool allocating_write_reqs_plugged;
                    164: 
                    165:     /* Periodic flush and clear need check flag */
                    166:     QEMUTimer *need_check_timer;
1.1.1.4 ! root      167: 
        !           168:     Error *migration_blocker;
1.1       root      169: } BDRVQEDState;
                    170: 
                    171: enum {
                    172:     QED_CLUSTER_FOUND,         /* cluster found */
1.1.1.3   root      173:     QED_CLUSTER_ZERO,          /* zero cluster found */
1.1       root      174:     QED_CLUSTER_L2,            /* cluster missing in L2 */
                    175:     QED_CLUSTER_L1,            /* cluster missing in L1 */
                    176: };
                    177: 
                    178: /**
                    179:  * qed_find_cluster() completion callback
                    180:  *
                    181:  * @opaque:     User data for completion callback
                    182:  * @ret:        QED_CLUSTER_FOUND   Success
                    183:  *              QED_CLUSTER_L2      Data cluster unallocated in L2
                    184:  *              QED_CLUSTER_L1      L2 unallocated in L1
                    185:  *              -errno              POSIX error occurred
                    186:  * @offset:     Data cluster offset
                    187:  * @len:        Contiguous bytes starting from cluster offset
                    188:  *
                    189:  * This function is invoked when qed_find_cluster() completes.
                    190:  *
                    191:  * On success ret is QED_CLUSTER_FOUND and offset/len are a contiguous range
                    192:  * in the image file.
                    193:  *
                    194:  * On failure ret is QED_CLUSTER_L2 or QED_CLUSTER_L1 for missing L2 or L1
                    195:  * table offset, respectively.  len is number of contiguous unallocated bytes.
                    196:  */
                    197: typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
                    198: 
                    199: /**
                    200:  * Generic callback for chaining async callbacks
                    201:  */
                    202: typedef struct {
                    203:     BlockDriverCompletionFunc *cb;
                    204:     void *opaque;
                    205: } GenericCB;
                    206: 
                    207: void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque);
                    208: void gencb_complete(void *opaque, int ret);
                    209: 
                    210: /**
                    211:  * L2 cache functions
                    212:  */
                    213: void qed_init_l2_cache(L2TableCache *l2_cache);
                    214: void qed_free_l2_cache(L2TableCache *l2_cache);
                    215: CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache);
                    216: void qed_unref_l2_cache_entry(CachedL2Table *entry);
                    217: CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset);
                    218: void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
                    219: 
                    220: /**
                    221:  * Table I/O functions
                    222:  */
                    223: int qed_read_l1_table_sync(BDRVQEDState *s);
                    224: void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
                    225:                         BlockDriverCompletionFunc *cb, void *opaque);
                    226: int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
                    227:                             unsigned int n);
                    228: int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                    229:                            uint64_t offset);
                    230: void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
                    231:                        BlockDriverCompletionFunc *cb, void *opaque);
                    232: void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
                    233:                         unsigned int index, unsigned int n, bool flush,
                    234:                         BlockDriverCompletionFunc *cb, void *opaque);
                    235: int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                    236:                             unsigned int index, unsigned int n, bool flush);
                    237: 
                    238: /**
                    239:  * Cluster functions
                    240:  */
                    241: void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
                    242:                       size_t len, QEDFindClusterFunc *cb, void *opaque);
                    243: 
                    244: /**
                    245:  * Consistency check
                    246:  */
                    247: int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix);
                    248: 
                    249: QEDTable *qed_alloc_table(BDRVQEDState *s);
                    250: 
                    251: /**
                    252:  * Round down to the start of a cluster
                    253:  */
                    254: static inline uint64_t qed_start_of_cluster(BDRVQEDState *s, uint64_t offset)
                    255: {
                    256:     return offset & ~(uint64_t)(s->header.cluster_size - 1);
                    257: }
                    258: 
                    259: static inline uint64_t qed_offset_into_cluster(BDRVQEDState *s, uint64_t offset)
                    260: {
                    261:     return offset & (s->header.cluster_size - 1);
                    262: }
                    263: 
1.1.1.2   root      264: static inline uint64_t qed_bytes_to_clusters(BDRVQEDState *s, uint64_t bytes)
1.1       root      265: {
                    266:     return qed_start_of_cluster(s, bytes + (s->header.cluster_size - 1)) /
                    267:            (s->header.cluster_size - 1);
                    268: }
                    269: 
                    270: static inline unsigned int qed_l1_index(BDRVQEDState *s, uint64_t pos)
                    271: {
                    272:     return pos >> s->l1_shift;
                    273: }
                    274: 
                    275: static inline unsigned int qed_l2_index(BDRVQEDState *s, uint64_t pos)
                    276: {
                    277:     return (pos >> s->l2_shift) & s->l2_mask;
                    278: }
                    279: 
                    280: /**
                    281:  * Test if a cluster offset is valid
                    282:  */
                    283: static inline bool qed_check_cluster_offset(BDRVQEDState *s, uint64_t offset)
                    284: {
                    285:     uint64_t header_size = (uint64_t)s->header.header_size *
                    286:                            s->header.cluster_size;
                    287: 
                    288:     if (offset & (s->header.cluster_size - 1)) {
                    289:         return false;
                    290:     }
                    291:     return offset >= header_size && offset < s->file_size;
                    292: }
                    293: 
                    294: /**
                    295:  * Test if a table offset is valid
                    296:  */
                    297: static inline bool qed_check_table_offset(BDRVQEDState *s, uint64_t offset)
                    298: {
                    299:     uint64_t end_offset = offset + (s->header.table_size - 1) *
                    300:                           s->header.cluster_size;
                    301: 
                    302:     /* Overflow check */
                    303:     if (end_offset <= offset) {
                    304:         return false;
                    305:     }
                    306: 
                    307:     return qed_check_cluster_offset(s, offset) &&
                    308:            qed_check_cluster_offset(s, end_offset);
                    309: }
                    310: 
1.1.1.3   root      311: static inline bool qed_offset_is_cluster_aligned(BDRVQEDState *s,
                    312:                                                  uint64_t offset)
                    313: {
                    314:     if (qed_offset_into_cluster(s, offset)) {
                    315:         return false;
                    316:     }
                    317:     return true;
                    318: }
                    319: 
                    320: static inline bool qed_offset_is_unalloc_cluster(uint64_t offset)
                    321: {
                    322:     if (offset == 0) {
                    323:         return true;
                    324:     }
                    325:     return false;
                    326: }
                    327: 
                    328: static inline bool qed_offset_is_zero_cluster(uint64_t offset)
                    329: {
                    330:     if (offset == 1) {
                    331:         return true;
                    332:     }
                    333:     return false;
                    334: }
                    335: 
1.1       root      336: #endif /* BLOCK_QED_H */

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.