|
|
1.1 root 1: /*
2: * Block driver for the QCOW version 2 format
3: *
4: * Copyright (c) 2004-2006 Fabrice Bellard
5: *
6: * Permission is hereby granted, free of charge, to any person obtaining a copy
7: * of this software and associated documentation files (the "Software"), to deal
8: * in the Software without restriction, including without limitation the rights
9: * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10: * copies of the Software, and to permit persons to whom the Software is
11: * furnished to do so, subject to the following conditions:
12: *
13: * The above copyright notice and this permission notice shall be included in
14: * all copies or substantial portions of the Software.
15: *
16: * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17: * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18: * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19: * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20: * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21: * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22: * THE SOFTWARE.
23: */
24:
25: #include <zlib.h>
26:
27: #include "qemu-common.h"
28: #include "block_int.h"
29: #include "block/qcow2.h"
30:
31: int qcow2_grow_l1_table(BlockDriverState *bs, int min_size)
32: {
33: BDRVQcowState *s = bs->opaque;
34: int new_l1_size, new_l1_size2, ret, i;
35: uint64_t *new_l1_table;
1.1.1.3 ! root 36: int64_t new_l1_table_offset;
1.1 root 37: uint8_t data[12];
38:
39: new_l1_size = s->l1_size;
40: if (min_size <= new_l1_size)
41: return 0;
1.1.1.2 root 42: if (new_l1_size == 0) {
43: new_l1_size = 1;
44: }
1.1 root 45: while (min_size > new_l1_size) {
46: new_l1_size = (new_l1_size * 3 + 1) / 2;
47: }
48: #ifdef DEBUG_ALLOC2
49: printf("grow l1_table from %d to %d\n", s->l1_size, new_l1_size);
50: #endif
51:
52: new_l1_size2 = sizeof(uint64_t) * new_l1_size;
53: new_l1_table = qemu_mallocz(align_offset(new_l1_size2, 512));
54: memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
55:
56: /* write new table (align to cluster) */
57: new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
1.1.1.3 ! root 58: if (new_l1_table_offset < 0) {
! 59: qemu_free(new_l1_table);
! 60: return new_l1_table_offset;
! 61: }
1.1 root 62:
63: for(i = 0; i < s->l1_size; i++)
64: new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
65: ret = bdrv_pwrite(s->hd, new_l1_table_offset, new_l1_table, new_l1_size2);
66: if (ret != new_l1_size2)
67: goto fail;
68: for(i = 0; i < s->l1_size; i++)
69: new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
70:
71: /* set new table */
72: cpu_to_be32w((uint32_t*)data, new_l1_size);
73: cpu_to_be64w((uint64_t*)(data + 4), new_l1_table_offset);
1.1.1.3 ! root 74: ret = bdrv_pwrite(s->hd, offsetof(QCowHeader, l1_size), data,sizeof(data));
! 75: if (ret != sizeof(data)) {
1.1 root 76: goto fail;
1.1.1.3 ! root 77: }
1.1 root 78: qemu_free(s->l1_table);
79: qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t));
80: s->l1_table_offset = new_l1_table_offset;
81: s->l1_table = new_l1_table;
82: s->l1_size = new_l1_size;
83: return 0;
84: fail:
1.1.1.3 ! root 85: qemu_free(new_l1_table);
! 86: qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2);
! 87: return ret < 0 ? ret : -EIO;
1.1 root 88: }
89:
90: void qcow2_l2_cache_reset(BlockDriverState *bs)
91: {
92: BDRVQcowState *s = bs->opaque;
93:
94: memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
95: memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
96: memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
97: }
98:
99: static inline int l2_cache_new_entry(BlockDriverState *bs)
100: {
101: BDRVQcowState *s = bs->opaque;
102: uint32_t min_count;
103: int min_index, i;
104:
105: /* find a new entry in the least used one */
106: min_index = 0;
107: min_count = 0xffffffff;
108: for(i = 0; i < L2_CACHE_SIZE; i++) {
109: if (s->l2_cache_counts[i] < min_count) {
110: min_count = s->l2_cache_counts[i];
111: min_index = i;
112: }
113: }
114: return min_index;
115: }
116:
117: /*
118: * seek_l2_table
119: *
120: * seek l2_offset in the l2_cache table
121: * if not found, return NULL,
122: * if found,
123: * increments the l2 cache hit count of the entry,
124: * if counter overflow, divide by two all counters
125: * return the pointer to the l2 cache entry
126: *
127: */
128:
129: static uint64_t *seek_l2_table(BDRVQcowState *s, uint64_t l2_offset)
130: {
131: int i, j;
132:
133: for(i = 0; i < L2_CACHE_SIZE; i++) {
134: if (l2_offset == s->l2_cache_offsets[i]) {
135: /* increment the hit count */
136: if (++s->l2_cache_counts[i] == 0xffffffff) {
137: for(j = 0; j < L2_CACHE_SIZE; j++) {
138: s->l2_cache_counts[j] >>= 1;
139: }
140: }
141: return s->l2_cache + (i << s->l2_bits);
142: }
143: }
144: return NULL;
145: }
146:
147: /*
148: * l2_load
149: *
150: * Loads a L2 table into memory. If the table is in the cache, the cache
151: * is used; otherwise the L2 table is loaded from the image file.
152: *
153: * Returns a pointer to the L2 table on success, or NULL if the read from
154: * the image file failed.
155: */
156:
157: static uint64_t *l2_load(BlockDriverState *bs, uint64_t l2_offset)
158: {
159: BDRVQcowState *s = bs->opaque;
160: int min_index;
161: uint64_t *l2_table;
162:
163: /* seek if the table for the given offset is in the cache */
164:
165: l2_table = seek_l2_table(s, l2_offset);
166: if (l2_table != NULL)
167: return l2_table;
168:
169: /* not found: load a new entry in the least used one */
170:
171: min_index = l2_cache_new_entry(bs);
172: l2_table = s->l2_cache + (min_index << s->l2_bits);
173: if (bdrv_pread(s->hd, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
174: s->l2_size * sizeof(uint64_t))
175: return NULL;
176: s->l2_cache_offsets[min_index] = l2_offset;
177: s->l2_cache_counts[min_index] = 1;
178:
179: return l2_table;
180: }
181:
182: /*
183: * Writes one sector of the L1 table to the disk (can't update single entries
184: * and we really don't want bdrv_pread to perform a read-modify-write)
185: */
186: #define L1_ENTRIES_PER_SECTOR (512 / 8)
187: static int write_l1_entry(BDRVQcowState *s, int l1_index)
188: {
189: uint64_t buf[L1_ENTRIES_PER_SECTOR];
190: int l1_start_index;
191: int i;
192:
193: l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1);
194: for (i = 0; i < L1_ENTRIES_PER_SECTOR; i++) {
195: buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
196: }
197:
198: if (bdrv_pwrite(s->hd, s->l1_table_offset + 8 * l1_start_index,
199: buf, sizeof(buf)) != sizeof(buf))
200: {
201: return -1;
202: }
203:
204: return 0;
205: }
206:
207: /*
208: * l2_allocate
209: *
210: * Allocate a new l2 entry in the file. If l1_index points to an already
211: * used entry in the L2 table (i.e. we are doing a copy on write for the L2
212: * table) copy the contents of the old L2 table into the newly allocated one.
213: * Otherwise the new table is initialized with zeros.
214: *
215: */
216:
217: static uint64_t *l2_allocate(BlockDriverState *bs, int l1_index)
218: {
219: BDRVQcowState *s = bs->opaque;
220: int min_index;
221: uint64_t old_l2_offset;
1.1.1.3 ! root 222: uint64_t *l2_table;
! 223: int64_t l2_offset;
1.1 root 224:
225: old_l2_offset = s->l1_table[l1_index];
226:
227: /* allocate a new l2 entry */
228:
229: l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
1.1.1.3 ! root 230: if (l2_offset < 0) {
! 231: return NULL;
! 232: }
1.1 root 233:
234: /* update the L1 entry */
235:
236: s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
237: if (write_l1_entry(s, l1_index) < 0) {
238: return NULL;
239: }
240:
241: /* allocate a new entry in the l2 cache */
242:
243: min_index = l2_cache_new_entry(bs);
244: l2_table = s->l2_cache + (min_index << s->l2_bits);
245:
246: if (old_l2_offset == 0) {
247: /* if there was no old l2 table, clear the new table */
248: memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
249: } else {
250: /* if there was an old l2 table, read it from the disk */
251: if (bdrv_pread(s->hd, old_l2_offset,
252: l2_table, s->l2_size * sizeof(uint64_t)) !=
253: s->l2_size * sizeof(uint64_t))
254: return NULL;
255: }
256: /* write the l2 table to the file */
257: if (bdrv_pwrite(s->hd, l2_offset,
258: l2_table, s->l2_size * sizeof(uint64_t)) !=
259: s->l2_size * sizeof(uint64_t))
260: return NULL;
261:
262: /* update the l2 cache entry */
263:
264: s->l2_cache_offsets[min_index] = l2_offset;
265: s->l2_cache_counts[min_index] = 1;
266:
267: return l2_table;
268: }
269:
270: static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
271: uint64_t *l2_table, uint64_t start, uint64_t mask)
272: {
273: int i;
274: uint64_t offset = be64_to_cpu(l2_table[0]) & ~mask;
275:
276: if (!offset)
277: return 0;
278:
279: for (i = start; i < start + nb_clusters; i++)
1.1.1.2 root 280: if (offset + (uint64_t) i * cluster_size != (be64_to_cpu(l2_table[i]) & ~mask))
1.1 root 281: break;
282:
283: return (i - start);
284: }
285:
286: static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
287: {
288: int i = 0;
289:
290: while(nb_clusters-- && l2_table[i] == 0)
291: i++;
292:
293: return i;
294: }
295:
296: /* The crypt function is compatible with the linux cryptoloop
297: algorithm for < 4 GB images. NOTE: out_buf == in_buf is
298: supported */
299: void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
300: uint8_t *out_buf, const uint8_t *in_buf,
301: int nb_sectors, int enc,
302: const AES_KEY *key)
303: {
304: union {
305: uint64_t ll[2];
306: uint8_t b[16];
307: } ivec;
308: int i;
309:
310: for(i = 0; i < nb_sectors; i++) {
311: ivec.ll[0] = cpu_to_le64(sector_num);
312: ivec.ll[1] = 0;
313: AES_cbc_encrypt(in_buf, out_buf, 512, key,
314: ivec.b, enc);
315: sector_num++;
316: in_buf += 512;
317: out_buf += 512;
318: }
319: }
320:
321:
322: static int qcow_read(BlockDriverState *bs, int64_t sector_num,
323: uint8_t *buf, int nb_sectors)
324: {
325: BDRVQcowState *s = bs->opaque;
326: int ret, index_in_cluster, n, n1;
327: uint64_t cluster_offset;
328:
329: while (nb_sectors > 0) {
330: n = nb_sectors;
331: cluster_offset = qcow2_get_cluster_offset(bs, sector_num << 9, &n);
332: index_in_cluster = sector_num & (s->cluster_sectors - 1);
333: if (!cluster_offset) {
334: if (bs->backing_hd) {
335: /* read from the base image */
336: n1 = qcow2_backing_read1(bs->backing_hd, sector_num, buf, n);
337: if (n1 > 0) {
338: ret = bdrv_read(bs->backing_hd, sector_num, buf, n1);
339: if (ret < 0)
340: return -1;
341: }
342: } else {
343: memset(buf, 0, 512 * n);
344: }
345: } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
346: if (qcow2_decompress_cluster(s, cluster_offset) < 0)
347: return -1;
348: memcpy(buf, s->cluster_cache + index_in_cluster * 512, 512 * n);
349: } else {
350: ret = bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512);
351: if (ret != n * 512)
352: return -1;
353: if (s->crypt_method) {
354: qcow2_encrypt_sectors(s, sector_num, buf, buf, n, 0,
355: &s->aes_decrypt_key);
356: }
357: }
358: nb_sectors -= n;
359: sector_num += n;
360: buf += n * 512;
361: }
362: return 0;
363: }
364:
365: static int copy_sectors(BlockDriverState *bs, uint64_t start_sect,
366: uint64_t cluster_offset, int n_start, int n_end)
367: {
368: BDRVQcowState *s = bs->opaque;
369: int n, ret;
370:
371: n = n_end - n_start;
372: if (n <= 0)
373: return 0;
374: ret = qcow_read(bs, start_sect + n_start, s->cluster_data, n);
375: if (ret < 0)
376: return ret;
377: if (s->crypt_method) {
378: qcow2_encrypt_sectors(s, start_sect + n_start,
379: s->cluster_data,
380: s->cluster_data, n, 1,
381: &s->aes_encrypt_key);
382: }
383: ret = bdrv_write(s->hd, (cluster_offset >> 9) + n_start,
384: s->cluster_data, n);
385: if (ret < 0)
386: return ret;
387: return 0;
388: }
389:
390:
391: /*
392: * get_cluster_offset
393: *
394: * For a given offset of the disk image, return cluster offset in
395: * qcow2 file.
396: *
397: * on entry, *num is the number of contiguous clusters we'd like to
398: * access following offset.
399: *
400: * on exit, *num is the number of contiguous clusters we can read.
401: *
402: * Return 1, if the offset is found
403: * Return 0, otherwise.
404: *
405: */
406:
407: uint64_t qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
408: int *num)
409: {
410: BDRVQcowState *s = bs->opaque;
1.1.1.2 root 411: unsigned int l1_index, l2_index;
1.1 root 412: uint64_t l2_offset, *l2_table, cluster_offset;
413: int l1_bits, c;
1.1.1.2 root 414: unsigned int index_in_cluster, nb_clusters;
415: uint64_t nb_available, nb_needed;
1.1 root 416:
417: index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
418: nb_needed = *num + index_in_cluster;
419:
420: l1_bits = s->l2_bits + s->cluster_bits;
421:
422: /* compute how many bytes there are between the offset and
423: * the end of the l1 entry
424: */
425:
1.1.1.2 root 426: nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1));
1.1 root 427:
428: /* compute the number of available sectors */
429:
430: nb_available = (nb_available >> 9) + index_in_cluster;
431:
432: if (nb_needed > nb_available) {
433: nb_needed = nb_available;
434: }
435:
436: cluster_offset = 0;
437:
438: /* seek the the l2 offset in the l1 table */
439:
440: l1_index = offset >> l1_bits;
441: if (l1_index >= s->l1_size)
442: goto out;
443:
444: l2_offset = s->l1_table[l1_index];
445:
446: /* seek the l2 table of the given l2 offset */
447:
448: if (!l2_offset)
449: goto out;
450:
451: /* load the l2 table in memory */
452:
453: l2_offset &= ~QCOW_OFLAG_COPIED;
454: l2_table = l2_load(bs, l2_offset);
455: if (l2_table == NULL)
456: return 0;
457:
458: /* find the cluster offset for the given disk offset */
459:
460: l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
461: cluster_offset = be64_to_cpu(l2_table[l2_index]);
462: nb_clusters = size_to_clusters(s, nb_needed << 9);
463:
464: if (!cluster_offset) {
465: /* how many empty clusters ? */
466: c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]);
467: } else {
468: /* how many allocated clusters ? */
469: c = count_contiguous_clusters(nb_clusters, s->cluster_size,
470: &l2_table[l2_index], 0, QCOW_OFLAG_COPIED);
471: }
472:
473: nb_available = (c * s->cluster_sectors);
474: out:
475: if (nb_available > nb_needed)
476: nb_available = nb_needed;
477:
478: *num = nb_available - index_in_cluster;
479:
480: return cluster_offset & ~QCOW_OFLAG_COPIED;
481: }
482:
483: /*
484: * get_cluster_table
485: *
486: * for a given disk offset, load (and allocate if needed)
487: * the l2 table.
488: *
489: * the l2 table offset in the qcow2 file and the cluster index
490: * in the l2 table are given to the caller.
491: *
1.1.1.3 ! root 492: * Returns 0 on success, -errno in failure case
1.1 root 493: */
494: static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
495: uint64_t **new_l2_table,
496: uint64_t *new_l2_offset,
497: int *new_l2_index)
498: {
499: BDRVQcowState *s = bs->opaque;
1.1.1.2 root 500: unsigned int l1_index, l2_index;
1.1 root 501: uint64_t l2_offset, *l2_table;
1.1.1.2 root 502: int ret;
1.1 root 503:
504: /* seek the the l2 offset in the l1 table */
505:
506: l1_index = offset >> (s->l2_bits + s->cluster_bits);
507: if (l1_index >= s->l1_size) {
508: ret = qcow2_grow_l1_table(bs, l1_index + 1);
1.1.1.3 ! root 509: if (ret < 0) {
! 510: return ret;
! 511: }
1.1 root 512: }
513: l2_offset = s->l1_table[l1_index];
514:
515: /* seek the l2 table of the given l2 offset */
516:
517: if (l2_offset & QCOW_OFLAG_COPIED) {
518: /* load the l2 table in memory */
519: l2_offset &= ~QCOW_OFLAG_COPIED;
520: l2_table = l2_load(bs, l2_offset);
1.1.1.3 ! root 521: if (l2_table == NULL) {
! 522: return -EIO;
! 523: }
1.1 root 524: } else {
525: if (l2_offset)
526: qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t));
527: l2_table = l2_allocate(bs, l1_index);
1.1.1.3 ! root 528: if (l2_table == NULL) {
! 529: return -EIO;
! 530: }
1.1 root 531: l2_offset = s->l1_table[l1_index] & ~QCOW_OFLAG_COPIED;
532: }
533:
534: /* find the cluster offset for the given disk offset */
535:
536: l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
537:
538: *new_l2_table = l2_table;
539: *new_l2_offset = l2_offset;
540: *new_l2_index = l2_index;
541:
1.1.1.3 ! root 542: return 0;
1.1 root 543: }
544:
545: /*
546: * alloc_compressed_cluster_offset
547: *
548: * For a given offset of the disk image, return cluster offset in
549: * qcow2 file.
550: *
551: * If the offset is not found, allocate a new compressed cluster.
552: *
553: * Return the cluster offset if successful,
554: * Return 0, otherwise.
555: *
556: */
557:
558: uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
559: uint64_t offset,
560: int compressed_size)
561: {
562: BDRVQcowState *s = bs->opaque;
563: int l2_index, ret;
1.1.1.3 ! root 564: uint64_t l2_offset, *l2_table;
! 565: int64_t cluster_offset;
1.1 root 566: int nb_csectors;
567:
568: ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
1.1.1.3 ! root 569: if (ret < 0) {
1.1 root 570: return 0;
1.1.1.3 ! root 571: }
1.1 root 572:
573: cluster_offset = be64_to_cpu(l2_table[l2_index]);
574: if (cluster_offset & QCOW_OFLAG_COPIED)
575: return cluster_offset & ~QCOW_OFLAG_COPIED;
576:
577: if (cluster_offset)
578: qcow2_free_any_clusters(bs, cluster_offset, 1);
579:
580: cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
1.1.1.3 ! root 581: if (cluster_offset < 0) {
! 582: return 0;
! 583: }
! 584:
1.1 root 585: nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) -
586: (cluster_offset >> 9);
587:
588: cluster_offset |= QCOW_OFLAG_COMPRESSED |
589: ((uint64_t)nb_csectors << s->csize_shift);
590:
591: /* update L2 table */
592:
593: /* compressed clusters never have the copied flag */
594:
595: l2_table[l2_index] = cpu_to_be64(cluster_offset);
596: if (bdrv_pwrite(s->hd,
597: l2_offset + l2_index * sizeof(uint64_t),
598: l2_table + l2_index,
599: sizeof(uint64_t)) != sizeof(uint64_t))
600: return 0;
601:
602: return cluster_offset;
603: }
604:
605: /*
606: * Write L2 table updates to disk, writing whole sectors to avoid a
607: * read-modify-write in bdrv_pwrite
608: */
609: #define L2_ENTRIES_PER_SECTOR (512 / 8)
610: static int write_l2_entries(BDRVQcowState *s, uint64_t *l2_table,
611: uint64_t l2_offset, int l2_index, int num)
612: {
613: int l2_start_index = l2_index & ~(L1_ENTRIES_PER_SECTOR - 1);
614: int start_offset = (8 * l2_index) & ~511;
615: int end_offset = (8 * (l2_index + num) + 511) & ~511;
616: size_t len = end_offset - start_offset;
617:
618: if (bdrv_pwrite(s->hd, l2_offset + start_offset, &l2_table[l2_start_index],
619: len) != len)
620: {
621: return -1;
622: }
623:
624: return 0;
625: }
626:
1.1.1.3 ! root 627: int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
1.1 root 628: {
629: BDRVQcowState *s = bs->opaque;
630: int i, j = 0, l2_index, ret;
631: uint64_t *old_cluster, start_sect, l2_offset, *l2_table;
1.1.1.3 ! root 632: uint64_t cluster_offset = m->cluster_offset;
1.1 root 633:
634: if (m->nb_clusters == 0)
635: return 0;
636:
637: old_cluster = qemu_malloc(m->nb_clusters * sizeof(uint64_t));
638:
639: /* copy content of unmodified sectors */
640: start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9;
641: if (m->n_start) {
642: ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start);
643: if (ret < 0)
644: goto err;
645: }
646:
647: if (m->nb_available & (s->cluster_sectors - 1)) {
648: uint64_t end = m->nb_available & ~(uint64_t)(s->cluster_sectors - 1);
649: ret = copy_sectors(bs, start_sect + end, cluster_offset + (end << 9),
650: m->nb_available - end, s->cluster_sectors);
651: if (ret < 0)
652: goto err;
653: }
654:
655: /* update L2 table */
1.1.1.3 ! root 656: ret = get_cluster_table(bs, m->offset, &l2_table, &l2_offset, &l2_index);
! 657: if (ret < 0) {
1.1 root 658: goto err;
1.1.1.3 ! root 659: }
1.1 root 660:
661: for (i = 0; i < m->nb_clusters; i++) {
662: /* if two concurrent writes happen to the same unallocated cluster
663: * each write allocates separate cluster and writes data concurrently.
664: * The first one to complete updates l2 table with pointer to its
665: * cluster the second one has to do RMW (which is done above by
666: * copy_sectors()), update l2 table with its cluster pointer and free
667: * old cluster. This is what this loop does */
668: if(l2_table[l2_index + i] != 0)
669: old_cluster[j++] = l2_table[l2_index + i];
670:
671: l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
672: (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
673: }
674:
675: if (write_l2_entries(s, l2_table, l2_offset, l2_index, m->nb_clusters) < 0) {
676: ret = -1;
677: goto err;
678: }
679:
680: for (i = 0; i < j; i++)
681: qcow2_free_any_clusters(bs,
682: be64_to_cpu(old_cluster[i]) & ~QCOW_OFLAG_COPIED, 1);
683:
684: ret = 0;
685: err:
686: qemu_free(old_cluster);
687: return ret;
688: }
689:
690: /*
691: * alloc_cluster_offset
692: *
1.1.1.3 ! root 693: * For a given offset of the disk image, return cluster offset in qcow2 file.
1.1 root 694: * If the offset is not found, allocate a new cluster.
695: *
1.1.1.3 ! root 696: * If the cluster was already allocated, m->nb_clusters is set to 0,
! 697: * m->depends_on is set to NULL and the other fields in m are meaningless.
1.1 root 698: *
1.1.1.3 ! root 699: * If the cluster is newly allocated, m->nb_clusters is set to the number of
! 700: * contiguous clusters that have been allocated. This may be 0 if the request
! 701: * conflict with another write request in flight; in this case, m->depends_on
! 702: * is set and the remaining fields of m are meaningless.
! 703: *
! 704: * If m->nb_clusters is non-zero, the other fields of m are valid and contain
! 705: * information about the first allocated cluster.
! 706: *
! 707: * Return 0 on success and -errno in error cases
1.1 root 708: */
1.1.1.3 ! root 709: int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
! 710: int n_start, int n_end, int *num, QCowL2Meta *m)
1.1 root 711: {
712: BDRVQcowState *s = bs->opaque;
713: int l2_index, ret;
1.1.1.3 ! root 714: uint64_t l2_offset, *l2_table;
! 715: int64_t cluster_offset;
1.1.1.2 root 716: unsigned int nb_clusters, i = 0;
1.1 root 717: QCowL2Meta *old_alloc;
718:
719: ret = get_cluster_table(bs, offset, &l2_table, &l2_offset, &l2_index);
1.1.1.3 ! root 720: if (ret < 0) {
! 721: return ret;
! 722: }
1.1 root 723:
724: nb_clusters = size_to_clusters(s, n_end << 9);
725:
726: nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
727:
728: cluster_offset = be64_to_cpu(l2_table[l2_index]);
729:
730: /* We keep all QCOW_OFLAG_COPIED clusters */
731:
732: if (cluster_offset & QCOW_OFLAG_COPIED) {
733: nb_clusters = count_contiguous_clusters(nb_clusters, s->cluster_size,
734: &l2_table[l2_index], 0, 0);
735:
736: cluster_offset &= ~QCOW_OFLAG_COPIED;
737: m->nb_clusters = 0;
1.1.1.3 ! root 738: m->depends_on = NULL;
1.1 root 739:
740: goto out;
741: }
742:
743: /* for the moment, multiple compressed clusters are not managed */
744:
745: if (cluster_offset & QCOW_OFLAG_COMPRESSED)
746: nb_clusters = 1;
747:
748: /* how many available clusters ? */
749:
750: while (i < nb_clusters) {
751: i += count_contiguous_clusters(nb_clusters - i, s->cluster_size,
752: &l2_table[l2_index], i, 0);
1.1.1.3 ! root 753: if ((i >= nb_clusters) || be64_to_cpu(l2_table[l2_index + i])) {
1.1 root 754: break;
1.1.1.3 ! root 755: }
1.1 root 756:
757: i += count_contiguous_free_clusters(nb_clusters - i,
758: &l2_table[l2_index + i]);
1.1.1.3 ! root 759: if (i >= nb_clusters) {
! 760: break;
! 761: }
1.1 root 762:
763: cluster_offset = be64_to_cpu(l2_table[l2_index + i]);
764:
765: if ((cluster_offset & QCOW_OFLAG_COPIED) ||
766: (cluster_offset & QCOW_OFLAG_COMPRESSED))
767: break;
768: }
1.1.1.3 ! root 769: assert(i <= nb_clusters);
1.1 root 770: nb_clusters = i;
771:
772: /*
773: * Check if there already is an AIO write request in flight which allocates
774: * the same cluster. In this case we need to wait until the previous
775: * request has completed and updated the L2 table accordingly.
776: */
1.1.1.2 root 777: QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
1.1 root 778:
779: uint64_t end_offset = offset + nb_clusters * s->cluster_size;
780: uint64_t old_offset = old_alloc->offset;
781: uint64_t old_end_offset = old_alloc->offset +
782: old_alloc->nb_clusters * s->cluster_size;
783:
784: if (end_offset < old_offset || offset > old_end_offset) {
785: /* No intersection */
786: } else {
787: if (offset < old_offset) {
788: /* Stop at the start of a running allocation */
789: nb_clusters = (old_offset - offset) >> s->cluster_bits;
790: } else {
791: nb_clusters = 0;
792: }
793:
794: if (nb_clusters == 0) {
795: /* Set dependency and wait for a callback */
796: m->depends_on = old_alloc;
797: m->nb_clusters = 0;
798: *num = 0;
799: return 0;
800: }
801: }
802: }
803:
804: if (!nb_clusters) {
805: abort();
806: }
807:
1.1.1.2 root 808: QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight);
1.1 root 809:
810: /* allocate a new cluster */
811:
812: cluster_offset = qcow2_alloc_clusters(bs, nb_clusters * s->cluster_size);
1.1.1.3 ! root 813: if (cluster_offset < 0) {
! 814: return cluster_offset;
! 815: }
1.1 root 816:
817: /* save info needed for meta data update */
818: m->offset = offset;
819: m->n_start = n_start;
820: m->nb_clusters = nb_clusters;
821:
822: out:
823: m->nb_available = MIN(nb_clusters << (s->cluster_bits - 9), n_end);
1.1.1.3 ! root 824: m->cluster_offset = cluster_offset;
1.1 root 825:
826: *num = m->nb_available - n_start;
827:
1.1.1.3 ! root 828: return 0;
1.1 root 829: }
830:
831: static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
832: const uint8_t *buf, int buf_size)
833: {
834: z_stream strm1, *strm = &strm1;
835: int ret, out_len;
836:
837: memset(strm, 0, sizeof(*strm));
838:
839: strm->next_in = (uint8_t *)buf;
840: strm->avail_in = buf_size;
841: strm->next_out = out_buf;
842: strm->avail_out = out_buf_size;
843:
844: ret = inflateInit2(strm, -12);
845: if (ret != Z_OK)
846: return -1;
847: ret = inflate(strm, Z_FINISH);
848: out_len = strm->next_out - out_buf;
849: if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
850: out_len != out_buf_size) {
851: inflateEnd(strm);
852: return -1;
853: }
854: inflateEnd(strm);
855: return 0;
856: }
857:
858: int qcow2_decompress_cluster(BDRVQcowState *s, uint64_t cluster_offset)
859: {
860: int ret, csize, nb_csectors, sector_offset;
861: uint64_t coffset;
862:
863: coffset = cluster_offset & s->cluster_offset_mask;
864: if (s->cluster_cache_offset != coffset) {
865: nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
866: sector_offset = coffset & 511;
867: csize = nb_csectors * 512 - sector_offset;
868: ret = bdrv_read(s->hd, coffset >> 9, s->cluster_data, nb_csectors);
869: if (ret < 0) {
870: return -1;
871: }
872: if (decompress_buffer(s->cluster_cache, s->cluster_size,
873: s->cluster_data + sector_offset, csize) < 0) {
874: return -1;
875: }
876: s->cluster_cache_offset = coffset;
877: }
878: return 0;
879: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.