Annotation of qemu/block/qed.h, revision 1.1.1.2

1.1       root        1: /*
                      2:  * QEMU Enhanced Disk Format
                      3:  *
                      4:  * Copyright IBM, Corp. 2010
                      5:  *
                      6:  * Authors:
                      7:  *  Stefan Hajnoczi   <[email protected]>
                      8:  *  Anthony Liguori   <[email protected]>
                      9:  *
                     10:  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
                     11:  * See the COPYING.LIB file in the top-level directory.
                     12:  *
                     13:  */
                     14: 
                     15: #ifndef BLOCK_QED_H
                     16: #define BLOCK_QED_H
                     17: 
                     18: #include "block_int.h"
                     19: 
                     20: /* The layout of a QED file is as follows:
                     21:  *
                     22:  * +--------+----------+----------+----------+-----+
                     23:  * | header | L1 table | cluster0 | cluster1 | ... |
                     24:  * +--------+----------+----------+----------+-----+
                     25:  *
                     26:  * There is a 2-level pagetable for cluster allocation:
                     27:  *
                     28:  *                     +----------+
                     29:  *                     | L1 table |
                     30:  *                     +----------+
                     31:  *                ,------'  |  '------.
                     32:  *           +----------+   |    +----------+
                     33:  *           | L2 table |  ...   | L2 table |
                     34:  *           +----------+        +----------+
                     35:  *       ,------'  |  '------.
                     36:  *  +----------+   |    +----------+
                     37:  *  |   Data   |  ...   |   Data   |
                     38:  *  +----------+        +----------+
                     39:  *
                     40:  * The L1 table is fixed size and always present.  L2 tables are allocated on
                     41:  * demand.  The L1 table size determines the maximum possible image size; it
                     42:  * can be influenced using the cluster_size and table_size values.
                     43:  *
                     44:  * All fields are little-endian on disk.
                     45:  */
                     46: 
                     47: enum {
                     48:     QED_MAGIC = 'Q' | 'E' << 8 | 'D' << 16 | '\0' << 24,
                     49: 
                     50:     /* The image supports a backing file */
                     51:     QED_F_BACKING_FILE = 0x01,
                     52: 
                     53:     /* The image needs a consistency check before use */
                     54:     QED_F_NEED_CHECK = 0x02,
                     55: 
                     56:     /* The backing file format must not be probed, treat as raw image */
                     57:     QED_F_BACKING_FORMAT_NO_PROBE = 0x04,
                     58: 
                     59:     /* Feature bits must be used when the on-disk format changes */
                     60:     QED_FEATURE_MASK = QED_F_BACKING_FILE | /* supported feature bits */
                     61:                        QED_F_NEED_CHECK |
                     62:                        QED_F_BACKING_FORMAT_NO_PROBE,
                     63:     QED_COMPAT_FEATURE_MASK = 0,            /* supported compat feature bits */
                     64:     QED_AUTOCLEAR_FEATURE_MASK = 0,         /* supported autoclear feature bits */
                     65: 
                     66:     /* Data is stored in groups of sectors called clusters.  Cluster size must
                     67:      * be large to avoid keeping too much metadata.  I/O requests that have
                     68:      * sub-cluster size will require read-modify-write.
                     69:      */
                     70:     QED_MIN_CLUSTER_SIZE = 4 * 1024, /* in bytes */
                     71:     QED_MAX_CLUSTER_SIZE = 64 * 1024 * 1024,
                     72:     QED_DEFAULT_CLUSTER_SIZE = 64 * 1024,
                     73: 
                     74:     /* Allocated clusters are tracked using a 2-level pagetable.  Table size is
                     75:      * a multiple of clusters so large maximum image sizes can be supported
                     76:      * without jacking up the cluster size too much.
                     77:      */
                     78:     QED_MIN_TABLE_SIZE = 1,        /* in clusters */
                     79:     QED_MAX_TABLE_SIZE = 16,
                     80:     QED_DEFAULT_TABLE_SIZE = 4,
                     81: };
                     82: 
                     83: typedef struct {
                     84:     uint32_t magic;                 /* QED\0 */
                     85: 
                     86:     uint32_t cluster_size;          /* in bytes */
                     87:     uint32_t table_size;            /* for L1 and L2 tables, in clusters */
                     88:     uint32_t header_size;           /* in clusters */
                     89: 
                     90:     uint64_t features;              /* format feature bits */
                     91:     uint64_t compat_features;       /* compatible feature bits */
                     92:     uint64_t autoclear_features;    /* self-resetting feature bits */
                     93: 
                     94:     uint64_t l1_table_offset;       /* in bytes */
                     95:     uint64_t image_size;            /* total logical image size, in bytes */
                     96: 
                     97:     /* if (features & QED_F_BACKING_FILE) */
                     98:     uint32_t backing_filename_offset; /* in bytes from start of header */
                     99:     uint32_t backing_filename_size;   /* in bytes */
                    100: } QEDHeader;
                    101: 
                    102: typedef struct {
                    103:     uint64_t offsets[0];            /* in bytes */
                    104: } QEDTable;
                    105: 
                    106: /* The L2 cache is a simple write-through cache for L2 structures */
                    107: typedef struct CachedL2Table {
                    108:     QEDTable *table;
                    109:     uint64_t offset;    /* offset=0 indicates an invalidate entry */
                    110:     QTAILQ_ENTRY(CachedL2Table) node;
                    111:     int ref;
                    112: } CachedL2Table;
                    113: 
                    114: typedef struct {
                    115:     QTAILQ_HEAD(, CachedL2Table) entries;
                    116:     unsigned int n_entries;
                    117: } L2TableCache;
                    118: 
                    119: typedef struct QEDRequest {
                    120:     CachedL2Table *l2_table;
                    121: } QEDRequest;
                    122: 
                    123: typedef struct QEDAIOCB {
                    124:     BlockDriverAIOCB common;
                    125:     QEMUBH *bh;
                    126:     int bh_ret;                     /* final return status for completion bh */
                    127:     QSIMPLEQ_ENTRY(QEDAIOCB) next;  /* next request */
                    128:     bool is_write;                  /* false - read, true - write */
                    129:     bool *finished;                 /* signal for cancel completion */
                    130:     uint64_t end_pos;               /* request end on block device, in bytes */
                    131: 
                    132:     /* User scatter-gather list */
                    133:     QEMUIOVector *qiov;
                    134:     size_t qiov_offset;             /* byte count already processed */
                    135: 
                    136:     /* Current cluster scatter-gather list */
                    137:     QEMUIOVector cur_qiov;
                    138:     uint64_t cur_pos;               /* position on block device, in bytes */
                    139:     uint64_t cur_cluster;           /* cluster offset in image file */
                    140:     unsigned int cur_nclusters;     /* number of clusters being accessed */
                    141:     int find_cluster_ret;           /* used for L1/L2 update */
                    142: 
                    143:     QEDRequest request;
                    144: } QEDAIOCB;
                    145: 
                    146: typedef struct {
                    147:     BlockDriverState *bs;           /* device */
                    148:     uint64_t file_size;             /* length of image file, in bytes */
                    149: 
                    150:     QEDHeader header;               /* always cpu-endian */
                    151:     QEDTable *l1_table;
                    152:     L2TableCache l2_cache;          /* l2 table cache */
                    153:     uint32_t table_nelems;
                    154:     uint32_t l1_shift;
                    155:     uint32_t l2_shift;
                    156:     uint32_t l2_mask;
                    157: 
                    158:     /* Allocating write request queue */
                    159:     QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs;
                    160: } BDRVQEDState;
                    161: 
                    162: enum {
                    163:     QED_CLUSTER_FOUND,         /* cluster found */
                    164:     QED_CLUSTER_L2,            /* cluster missing in L2 */
                    165:     QED_CLUSTER_L1,            /* cluster missing in L1 */
                    166: };
                    167: 
                    168: /**
                    169:  * qed_find_cluster() completion callback
                    170:  *
                    171:  * @opaque:     User data for completion callback
                    172:  * @ret:        QED_CLUSTER_FOUND   Success
                    173:  *              QED_CLUSTER_L2      Data cluster unallocated in L2
                    174:  *              QED_CLUSTER_L1      L2 unallocated in L1
                    175:  *              -errno              POSIX error occurred
                    176:  * @offset:     Data cluster offset
                    177:  * @len:        Contiguous bytes starting from cluster offset
                    178:  *
                    179:  * This function is invoked when qed_find_cluster() completes.
                    180:  *
                    181:  * On success ret is QED_CLUSTER_FOUND and offset/len are a contiguous range
                    182:  * in the image file.
                    183:  *
                    184:  * On failure ret is QED_CLUSTER_L2 or QED_CLUSTER_L1 for missing L2 or L1
                    185:  * table offset, respectively.  len is number of contiguous unallocated bytes.
                    186:  */
                    187: typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
                    188: 
                    189: /**
                    190:  * Generic callback for chaining async callbacks
                    191:  */
                    192: typedef struct {
                    193:     BlockDriverCompletionFunc *cb;
                    194:     void *opaque;
                    195: } GenericCB;
                    196: 
                    197: void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque);
                    198: void gencb_complete(void *opaque, int ret);
                    199: 
                    200: /**
                    201:  * L2 cache functions
                    202:  */
                    203: void qed_init_l2_cache(L2TableCache *l2_cache);
                    204: void qed_free_l2_cache(L2TableCache *l2_cache);
                    205: CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache);
                    206: void qed_unref_l2_cache_entry(CachedL2Table *entry);
                    207: CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset);
                    208: void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
                    209: 
                    210: /**
                    211:  * Table I/O functions
                    212:  */
                    213: int qed_read_l1_table_sync(BDRVQEDState *s);
                    214: void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
                    215:                         BlockDriverCompletionFunc *cb, void *opaque);
                    216: int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
                    217:                             unsigned int n);
                    218: int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                    219:                            uint64_t offset);
                    220: void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
                    221:                        BlockDriverCompletionFunc *cb, void *opaque);
                    222: void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
                    223:                         unsigned int index, unsigned int n, bool flush,
                    224:                         BlockDriverCompletionFunc *cb, void *opaque);
                    225: int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                    226:                             unsigned int index, unsigned int n, bool flush);
                    227: 
                    228: /**
                    229:  * Cluster functions
                    230:  */
                    231: void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
                    232:                       size_t len, QEDFindClusterFunc *cb, void *opaque);
                    233: 
                    234: /**
                    235:  * Consistency check
                    236:  */
                    237: int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix);
                    238: 
                    239: QEDTable *qed_alloc_table(BDRVQEDState *s);
                    240: 
                    241: /**
                    242:  * Round down to the start of a cluster
                    243:  */
                    244: static inline uint64_t qed_start_of_cluster(BDRVQEDState *s, uint64_t offset)
                    245: {
                    246:     return offset & ~(uint64_t)(s->header.cluster_size - 1);
                    247: }
                    248: 
                    249: static inline uint64_t qed_offset_into_cluster(BDRVQEDState *s, uint64_t offset)
                    250: {
                    251:     return offset & (s->header.cluster_size - 1);
                    252: }
                    253: 
1.1.1.2 ! root      254: static inline uint64_t qed_bytes_to_clusters(BDRVQEDState *s, uint64_t bytes)
1.1       root      255: {
                    256:     return qed_start_of_cluster(s, bytes + (s->header.cluster_size - 1)) /
                    257:            (s->header.cluster_size - 1);
                    258: }
                    259: 
                    260: static inline unsigned int qed_l1_index(BDRVQEDState *s, uint64_t pos)
                    261: {
                    262:     return pos >> s->l1_shift;
                    263: }
                    264: 
                    265: static inline unsigned int qed_l2_index(BDRVQEDState *s, uint64_t pos)
                    266: {
                    267:     return (pos >> s->l2_shift) & s->l2_mask;
                    268: }
                    269: 
                    270: /**
                    271:  * Test if a cluster offset is valid
                    272:  */
                    273: static inline bool qed_check_cluster_offset(BDRVQEDState *s, uint64_t offset)
                    274: {
                    275:     uint64_t header_size = (uint64_t)s->header.header_size *
                    276:                            s->header.cluster_size;
                    277: 
                    278:     if (offset & (s->header.cluster_size - 1)) {
                    279:         return false;
                    280:     }
                    281:     return offset >= header_size && offset < s->file_size;
                    282: }
                    283: 
                    284: /**
                    285:  * Test if a table offset is valid
                    286:  */
                    287: static inline bool qed_check_table_offset(BDRVQEDState *s, uint64_t offset)
                    288: {
                    289:     uint64_t end_offset = offset + (s->header.table_size - 1) *
                    290:                           s->header.cluster_size;
                    291: 
                    292:     /* Overflow check */
                    293:     if (end_offset <= offset) {
                    294:         return false;
                    295:     }
                    296: 
                    297:     return qed_check_cluster_offset(s, offset) &&
                    298:            qed_check_cluster_offset(s, end_offset);
                    299: }
                    300: 
                    301: #endif /* BLOCK_QED_H */

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.