qemu/block/qed.h - annotate

Return to qed.h CVS log
Up to [Qemu by Fabrice Bellard] / qemu / block
Annotation of qemu/block/qed.h, revision 1.1.1.5

1.1       root        1: /*
                      2:  * QEMU Enhanced Disk Format
                      3:  *
                      4:  * Copyright IBM, Corp. 2010
                      5:  *
                      6:  * Authors:
                      7:  *  Stefan Hajnoczi   <[email protected]>
                      8:  *  Anthony Liguori   <[email protected]>
                      9:  *
                     10:  * This work is licensed under the terms of the GNU LGPL, version 2 or later.
                     11:  * See the COPYING.LIB file in the top-level directory.
                     12:  *
                     13:  */
                     14: 
                     15: #ifndef BLOCK_QED_H
                     16: #define BLOCK_QED_H
                     17: 
                     18: #include "block_int.h"
                     19: 
                     20: /* The layout of a QED file is as follows:
                     21:  *
                     22:  * +--------+----------+----------+----------+-----+
                     23:  * | header | L1 table | cluster0 | cluster1 | ... |
                     24:  * +--------+----------+----------+----------+-----+
                     25:  *
                     26:  * There is a 2-level pagetable for cluster allocation:
                     27:  *
                     28:  *                     +----------+
                     29:  *                     | L1 table |
                     30:  *                     +----------+
                     31:  *                ,------'  |  '------.
                     32:  *           +----------+   |    +----------+
                     33:  *           | L2 table |  ...   | L2 table |
                     34:  *           +----------+        +----------+
                     35:  *       ,------'  |  '------.
                     36:  *  +----------+   |    +----------+
                     37:  *  |   Data   |  ...   |   Data   |
                     38:  *  +----------+        +----------+
                     39:  *
                     40:  * The L1 table is fixed size and always present.  L2 tables are allocated on
                     41:  * demand.  The L1 table size determines the maximum possible image size; it
                     42:  * can be influenced using the cluster_size and table_size values.
                     43:  *
                     44:  * All fields are little-endian on disk.
                     45:  */
                     46: 
                     47: enum {
                     48:     QED_MAGIC = 'Q' | 'E' << 8 | 'D' << 16 | '\0' << 24,
                     49: 
                     50:     /* The image supports a backing file */
                     51:     QED_F_BACKING_FILE = 0x01,
                     52: 
                     53:     /* The image needs a consistency check before use */
                     54:     QED_F_NEED_CHECK = 0x02,
                     55: 
                     56:     /* The backing file format must not be probed, treat as raw image */
                     57:     QED_F_BACKING_FORMAT_NO_PROBE = 0x04,
                     58: 
                     59:     /* Feature bits must be used when the on-disk format changes */
                     60:     QED_FEATURE_MASK = QED_F_BACKING_FILE | /* supported feature bits */
                     61:                        QED_F_NEED_CHECK |
                     62:                        QED_F_BACKING_FORMAT_NO_PROBE,
                     63:     QED_COMPAT_FEATURE_MASK = 0,            /* supported compat feature bits */
                     64:     QED_AUTOCLEAR_FEATURE_MASK = 0,         /* supported autoclear feature bits */
                     65: 
                     66:     /* Data is stored in groups of sectors called clusters.  Cluster size must
                     67:      * be large to avoid keeping too much metadata.  I/O requests that have
                     68:      * sub-cluster size will require read-modify-write.
                     69:      */
                     70:     QED_MIN_CLUSTER_SIZE = 4 * 1024, /* in bytes */
                     71:     QED_MAX_CLUSTER_SIZE = 64 * 1024 * 1024,
                     72:     QED_DEFAULT_CLUSTER_SIZE = 64 * 1024,
                     73: 
                     74:     /* Allocated clusters are tracked using a 2-level pagetable.  Table size is
                     75:      * a multiple of clusters so large maximum image sizes can be supported
                     76:      * without jacking up the cluster size too much.
                     77:      */
                     78:     QED_MIN_TABLE_SIZE = 1,        /* in clusters */
                     79:     QED_MAX_TABLE_SIZE = 16,
                     80:     QED_DEFAULT_TABLE_SIZE = 4,
1.1.1.3   root       81: 
                     82:     /* Delay to flush and clean image after last allocating write completes */
                     83:     QED_NEED_CHECK_TIMEOUT = 5,    /* in seconds */
1.1       root       84: };
                     85: 
                     86: typedef struct {
                     87:     uint32_t magic;                 /* QED\0 */
                     88: 
                     89:     uint32_t cluster_size;          /* in bytes */
                     90:     uint32_t table_size;            /* for L1 and L2 tables, in clusters */
                     91:     uint32_t header_size;           /* in clusters */
                     92: 
                     93:     uint64_t features;              /* format feature bits */
                     94:     uint64_t compat_features;       /* compatible feature bits */
                     95:     uint64_t autoclear_features;    /* self-resetting feature bits */
                     96: 
                     97:     uint64_t l1_table_offset;       /* in bytes */
                     98:     uint64_t image_size;            /* total logical image size, in bytes */
                     99: 
                    100:     /* if (features & QED_F_BACKING_FILE) */
                    101:     uint32_t backing_filename_offset; /* in bytes from start of header */
                    102:     uint32_t backing_filename_size;   /* in bytes */
                    103: } QEDHeader;
                    104: 
                    105: typedef struct {
                    106:     uint64_t offsets[0];            /* in bytes */
                    107: } QEDTable;
                    108: 
                    109: /* The L2 cache is a simple write-through cache for L2 structures */
                    110: typedef struct CachedL2Table {
                    111:     QEDTable *table;
                    112:     uint64_t offset;    /* offset=0 indicates an invalidate entry */
                    113:     QTAILQ_ENTRY(CachedL2Table) node;
                    114:     int ref;
                    115: } CachedL2Table;
                    116: 
                    117: typedef struct {
                    118:     QTAILQ_HEAD(, CachedL2Table) entries;
                    119:     unsigned int n_entries;
                    120: } L2TableCache;
                    121: 
                    122: typedef struct QEDRequest {
                    123:     CachedL2Table *l2_table;
                    124: } QEDRequest;
                    125: 
1.1.1.5 ! root      126: enum {
        !           127:     QED_AIOCB_WRITE = 0x0001,       /* read or write? */
        !           128:     QED_AIOCB_ZERO  = 0x0002,       /* zero write, used with QED_AIOCB_WRITE */
        !           129: };
        !           130: 
1.1       root      131: typedef struct QEDAIOCB {
                    132:     BlockDriverAIOCB common;
                    133:     QEMUBH *bh;
                    134:     int bh_ret;                     /* final return status for completion bh */
                    135:     QSIMPLEQ_ENTRY(QEDAIOCB) next;  /* next request */
1.1.1.5 ! root      136:     int flags;                      /* QED_AIOCB_* bits ORed together */
1.1       root      137:     bool *finished;                 /* signal for cancel completion */
                    138:     uint64_t end_pos;               /* request end on block device, in bytes */
                    139: 
                    140:     /* User scatter-gather list */
                    141:     QEMUIOVector *qiov;
                    142:     size_t qiov_offset;             /* byte count already processed */
                    143: 
                    144:     /* Current cluster scatter-gather list */
                    145:     QEMUIOVector cur_qiov;
                    146:     uint64_t cur_pos;               /* position on block device, in bytes */
                    147:     uint64_t cur_cluster;           /* cluster offset in image file */
                    148:     unsigned int cur_nclusters;     /* number of clusters being accessed */
                    149:     int find_cluster_ret;           /* used for L1/L2 update */
                    150: 
                    151:     QEDRequest request;
                    152: } QEDAIOCB;
                    153: 
                    154: typedef struct {
                    155:     BlockDriverState *bs;           /* device */
                    156:     uint64_t file_size;             /* length of image file, in bytes */
                    157: 
                    158:     QEDHeader header;               /* always cpu-endian */
                    159:     QEDTable *l1_table;
                    160:     L2TableCache l2_cache;          /* l2 table cache */
                    161:     uint32_t table_nelems;
                    162:     uint32_t l1_shift;
                    163:     uint32_t l2_shift;
                    164:     uint32_t l2_mask;
                    165: 
                    166:     /* Allocating write request queue */
                    167:     QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs;
1.1.1.3   root      168:     bool allocating_write_reqs_plugged;
                    169: 
                    170:     /* Periodic flush and clear need check flag */
                    171:     QEMUTimer *need_check_timer;
1.1       root      172: } BDRVQEDState;
                    173: 
                    174: enum {
                    175:     QED_CLUSTER_FOUND,         /* cluster found */
1.1.1.3   root      176:     QED_CLUSTER_ZERO,          /* zero cluster found */
1.1       root      177:     QED_CLUSTER_L2,            /* cluster missing in L2 */
                    178:     QED_CLUSTER_L1,            /* cluster missing in L1 */
                    179: };
                    180: 
                    181: /**
                    182:  * qed_find_cluster() completion callback
                    183:  *
                    184:  * @opaque:     User data for completion callback
                    185:  * @ret:        QED_CLUSTER_FOUND   Success
                    186:  *              QED_CLUSTER_L2      Data cluster unallocated in L2
                    187:  *              QED_CLUSTER_L1      L2 unallocated in L1
                    188:  *              -errno              POSIX error occurred
                    189:  * @offset:     Data cluster offset
                    190:  * @len:        Contiguous bytes starting from cluster offset
                    191:  *
                    192:  * This function is invoked when qed_find_cluster() completes.
                    193:  *
                    194:  * On success ret is QED_CLUSTER_FOUND and offset/len are a contiguous range
                    195:  * in the image file.
                    196:  *
                    197:  * On failure ret is QED_CLUSTER_L2 or QED_CLUSTER_L1 for missing L2 or L1
                    198:  * table offset, respectively.  len is number of contiguous unallocated bytes.
                    199:  */
                    200: typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
                    201: 
                    202: /**
                    203:  * Generic callback for chaining async callbacks
                    204:  */
                    205: typedef struct {
                    206:     BlockDriverCompletionFunc *cb;
                    207:     void *opaque;
                    208: } GenericCB;
                    209: 
                    210: void *gencb_alloc(size_t len, BlockDriverCompletionFunc *cb, void *opaque);
                    211: void gencb_complete(void *opaque, int ret);
                    212: 
                    213: /**
                    214:  * L2 cache functions
                    215:  */
                    216: void qed_init_l2_cache(L2TableCache *l2_cache);
                    217: void qed_free_l2_cache(L2TableCache *l2_cache);
                    218: CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache);
                    219: void qed_unref_l2_cache_entry(CachedL2Table *entry);
                    220: CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset);
                    221: void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
                    222: 
                    223: /**
                    224:  * Table I/O functions
                    225:  */
                    226: int qed_read_l1_table_sync(BDRVQEDState *s);
                    227: void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
                    228:                         BlockDriverCompletionFunc *cb, void *opaque);
                    229: int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
                    230:                             unsigned int n);
                    231: int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                    232:                            uint64_t offset);
                    233: void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
                    234:                        BlockDriverCompletionFunc *cb, void *opaque);
                    235: void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
                    236:                         unsigned int index, unsigned int n, bool flush,
                    237:                         BlockDriverCompletionFunc *cb, void *opaque);
                    238: int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
                    239:                             unsigned int index, unsigned int n, bool flush);
                    240: 
                    241: /**
                    242:  * Cluster functions
                    243:  */
                    244: void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
                    245:                       size_t len, QEDFindClusterFunc *cb, void *opaque);
                    246: 
                    247: /**
                    248:  * Consistency check
                    249:  */
                    250: int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix);
                    251: 
                    252: QEDTable *qed_alloc_table(BDRVQEDState *s);
                    253: 
                    254: /**
                    255:  * Round down to the start of a cluster
                    256:  */
                    257: static inline uint64_t qed_start_of_cluster(BDRVQEDState *s, uint64_t offset)
                    258: {
                    259:     return offset & ~(uint64_t)(s->header.cluster_size - 1);
                    260: }
                    261: 
                    262: static inline uint64_t qed_offset_into_cluster(BDRVQEDState *s, uint64_t offset)
                    263: {
                    264:     return offset & (s->header.cluster_size - 1);
                    265: }
                    266: 
1.1.1.2   root      267: static inline uint64_t qed_bytes_to_clusters(BDRVQEDState *s, uint64_t bytes)
1.1       root      268: {
                    269:     return qed_start_of_cluster(s, bytes + (s->header.cluster_size - 1)) /
                    270:            (s->header.cluster_size - 1);
                    271: }
                    272: 
                    273: static inline unsigned int qed_l1_index(BDRVQEDState *s, uint64_t pos)
                    274: {
                    275:     return pos >> s->l1_shift;
                    276: }
                    277: 
                    278: static inline unsigned int qed_l2_index(BDRVQEDState *s, uint64_t pos)
                    279: {
                    280:     return (pos >> s->l2_shift) & s->l2_mask;
                    281: }
                    282: 
                    283: /**
                    284:  * Test if a cluster offset is valid
                    285:  */
                    286: static inline bool qed_check_cluster_offset(BDRVQEDState *s, uint64_t offset)
                    287: {
                    288:     uint64_t header_size = (uint64_t)s->header.header_size *
                    289:                            s->header.cluster_size;
                    290: 
                    291:     if (offset & (s->header.cluster_size - 1)) {
                    292:         return false;
                    293:     }
                    294:     return offset >= header_size && offset < s->file_size;
                    295: }
                    296: 
                    297: /**
                    298:  * Test if a table offset is valid
                    299:  */
                    300: static inline bool qed_check_table_offset(BDRVQEDState *s, uint64_t offset)
                    301: {
                    302:     uint64_t end_offset = offset + (s->header.table_size - 1) *
                    303:                           s->header.cluster_size;
                    304: 
                    305:     /* Overflow check */
                    306:     if (end_offset <= offset) {
                    307:         return false;
                    308:     }
                    309: 
                    310:     return qed_check_cluster_offset(s, offset) &&
                    311:            qed_check_cluster_offset(s, end_offset);
                    312: }
                    313: 
1.1.1.3   root      314: static inline bool qed_offset_is_cluster_aligned(BDRVQEDState *s,
                    315:                                                  uint64_t offset)
                    316: {
                    317:     if (qed_offset_into_cluster(s, offset)) {
                    318:         return false;
                    319:     }
                    320:     return true;
                    321: }
                    322: 
                    323: static inline bool qed_offset_is_unalloc_cluster(uint64_t offset)
                    324: {
                    325:     if (offset == 0) {
                    326:         return true;
                    327:     }
                    328:     return false;
                    329: }
                    330: 
                    331: static inline bool qed_offset_is_zero_cluster(uint64_t offset)
                    332: {
                    333:     if (offset == 1) {
                    334:         return true;
                    335:     }
                    336:     return false;
                    337: }
                    338: 
1.1       root      339: #endif /* BLOCK_QED_H */
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.