1 /*
2 * kernel/lvm-snap.c
3 *
4 * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5 * Heinz Mauelshagen, Sistina Software (persistent snapshots)
6 *
7 * LVM snapshot driver is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2, or (at your option)
10 * any later version.
11 *
12 * LVM snapshot driver is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU CC; see the file COPYING. If not, write to
19 * the Free Software Foundation, 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
21 *
22 */
23
24 #include <linux/kernel.h>
25 #include <linux/vmalloc.h>
26 #include <linux/blkdev.h>
27 #include <linux/smp_lock.h>
28 #include <linux/types.h>
29 #include <linux/iobuf.h>
30 #include <linux/lvm.h>
31
32
33 static char *lvm_snap_version __attribute__ ((unused)) = "LVM 0.9 snapshot code (13/11/2000)\n";
34
35 extern const char *const lvm_name;
36 extern int lvm_blocksizes[];
37
38 void lvm_snapshot_release(lv_t *);
39
40 uint lvm_pv_get_number(vg_t * vg, kdev_t rdev)
41 {
42 uint p;
43
44 for ( p = 0; p < vg->pv_max; p++)
45 {
46 if ( vg->pv[p] == NULL) continue;
47 if ( vg->pv[p]->pv_dev == rdev) break;
48 }
49
50 return vg->pv[p]->pv_number;
51 }
52
53
54 #define hashfn(dev,block,mask,chunk_size) \
55 ((HASHDEV(dev)^((block)/(chunk_size))) & (mask))
56
57 static inline lv_block_exception_t *
58 lvm_find_exception_table(kdev_t org_dev, unsigned long org_start, lv_t * lv)
59 {
60 struct list_head * hash_table = lv->lv_snapshot_hash_table, * next;
61 unsigned long mask = lv->lv_snapshot_hash_mask;
62 int chunk_size = lv->lv_chunk_size;
63 lv_block_exception_t * ret;
64 int i = 0;
65
66 hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
67 ret = NULL;
68 for (next = hash_table->next; next != hash_table; next = next->next)
69 {
70 lv_block_exception_t * exception;
71
72 exception = list_entry(next, lv_block_exception_t, hash);
73 if (exception->rsector_org == org_start &&
74 exception->rdev_org == org_dev)
75 {
76 if (i)
77 {
78 /* fun, isn't it? :) */
79 list_del(next);
80 list_add(next, hash_table);
81 }
82 ret = exception;
83 break;
84 }
85 i++;
86 }
87 return ret;
88 }
89
90 inline void lvm_hash_link(lv_block_exception_t * exception,
91 kdev_t org_dev, unsigned long org_start,
92 lv_t * lv)
93 {
94 struct list_head * hash_table = lv->lv_snapshot_hash_table;
95 unsigned long mask = lv->lv_snapshot_hash_mask;
96 int chunk_size = lv->lv_chunk_size;
97
98 hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
99 list_add(&exception->hash, hash_table);
100 }
101
102 int lvm_snapshot_remap_block(kdev_t * org_dev, unsigned long * org_sector,
103 unsigned long pe_start, lv_t * lv)
104 {
105 int ret;
106 unsigned long pe_off, pe_adjustment, __org_start;
107 kdev_t __org_dev;
108 int chunk_size = lv->lv_chunk_size;
109 lv_block_exception_t * exception;
110
111 pe_off = pe_start % chunk_size;
112 pe_adjustment = (*org_sector-pe_off) % chunk_size;
113 __org_start = *org_sector - pe_adjustment;
114 __org_dev = *org_dev;
115 ret = 0;
116 exception = lvm_find_exception_table(__org_dev, __org_start, lv);
117 if (exception)
118 {
119 *org_dev = exception->rdev_new;
120 *org_sector = exception->rsector_new + pe_adjustment;
121 ret = 1;
122 }
123 return ret;
124 }
125
126 void lvm_drop_snapshot(lv_t * lv_snap, const char * reason)
127 {
128 kdev_t last_dev;
129 int i;
130
131 /* no exception storage space available for this snapshot
132 or error on this snapshot --> release it */
133 invalidate_buffers(lv_snap->lv_dev);
134
135 for (i = last_dev = 0; i < lv_snap->lv_remap_ptr; i++) {
136 if ( lv_snap->lv_block_exception[i].rdev_new != last_dev) {
137 last_dev = lv_snap->lv_block_exception[i].rdev_new;
138 invalidate_buffers(last_dev);
139 }
140 }
141
142 lvm_snapshot_release(lv_snap);
143
144 printk(KERN_INFO
145 "%s -- giving up to snapshot %s on %s due %s\n",
146 lvm_name, lv_snap->lv_snapshot_org->lv_name, lv_snap->lv_name,
147 reason);
148 }
149
150 static inline void lvm_snapshot_prepare_blocks(unsigned long * blocks,
151 unsigned long start,
152 int nr_sectors,
153 int blocksize)
154 {
155 int i, sectors_per_block, nr_blocks;
156
157 sectors_per_block = blocksize >> 9;
158 nr_blocks = nr_sectors / sectors_per_block;
159 start /= sectors_per_block;
160
161 for (i = 0; i < nr_blocks; i++)
162 blocks[i] = start++;
163 }
164
165 inline int lvm_get_blksize(kdev_t dev)
166 {
167 int correct_size = BLOCK_SIZE, i, major;
168
169 major = MAJOR(dev);
170 if (blksize_size[major])
171 {
172 i = blksize_size[major][MINOR(dev)];
173 if (i)
174 correct_size = i;
175 }
176 return correct_size;
177 }
178
179 #ifdef DEBUG_SNAPSHOT
180 static inline void invalidate_snap_cache(unsigned long start, unsigned long nr,
181 kdev_t dev)
182 {
183 struct buffer_head * bh;
184 int sectors_per_block, i, blksize, minor;
185
186 minor = MINOR(dev);
187 blksize = lvm_blocksizes[minor];
188 sectors_per_block = blksize >> 9;
189 nr /= sectors_per_block;
190 start /= sectors_per_block;
191
192 for (i = 0; i < nr; i++)
193 {
194 bh = get_hash_table(dev, start++, blksize);
195 if (bh)
196 bforget(bh);
197 }
198 }
199 #endif
200
201
202 void lvm_snapshot_fill_COW_page(vg_t * vg, lv_t * lv_snap)
203 {
204 int id = 0, is = lv_snap->lv_remap_ptr;
205 ulong blksize_snap;
206 lv_COW_table_disk_t * lv_COW_table =
207 ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_page);
208
209 if (is == 0) return;
210 is--;
211 blksize_snap = lvm_get_blksize(lv_snap->lv_block_exception[is].rdev_new);
212 is -= is % (blksize_snap / sizeof(lv_COW_table_disk_t));
213
214 memset(lv_COW_table, 0, blksize_snap);
215 for ( ; is < lv_snap->lv_remap_ptr; is++, id++) {
216 /* store new COW_table entry */
217 lv_COW_table[id].pv_org_number = LVM_TO_DISK64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[is].rdev_org));
218 lv_COW_table[id].pv_org_rsector = LVM_TO_DISK64(lv_snap->lv_block_exception[is].rsector_org);
219 lv_COW_table[id].pv_snap_number = LVM_TO_DISK64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[is].rdev_new));
220 lv_COW_table[id].pv_snap_rsector = LVM_TO_DISK64(lv_snap->lv_block_exception[is].rsector_new);
221 }
222 }
223
224
225 /*
226 * writes a COW exception table sector to disk (HM)
227 *
228 */
229
230 int lvm_write_COW_table_block(vg_t * vg,
231 lv_t * lv_snap)
232 {
233 int blksize_snap;
234 int end_of_table;
235 int idx = lv_snap->lv_remap_ptr, idx_COW_table;
236 int nr_pages_tmp;
237 int length_tmp;
238 ulong snap_pe_start, COW_table_sector_offset,
239 COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block;
240 ulong blocks[1];
241 const char * reason;
242 kdev_t snap_phys_dev;
243 struct kiobuf * iobuf = lv_snap->lv_iobuf;
244 struct page * page_tmp;
245 lv_COW_table_disk_t * lv_COW_table =
246 ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_page);
247
248 idx--;
249
250 COW_chunks_per_pe = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv_snap);
251 COW_entries_per_pe = LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv_snap);
252
253 /* get physical addresse of destination chunk */
254 snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
255 snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
256
257 blksize_snap = lvm_get_blksize(snap_phys_dev);
258
259 COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t);
260 idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block;
261
262 if ( idx_COW_table == 0) memset(lv_COW_table, 0, blksize_snap);
263
264 /* sector offset into the on disk COW table */
265 COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t));
266
267 /* COW table block to write next */
268 blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10);
269
270 /* store new COW_table entry */
271 lv_COW_table[idx_COW_table].pv_org_number = LVM_TO_DISK64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[idx].rdev_org));
272 lv_COW_table[idx_COW_table].pv_org_rsector = LVM_TO_DISK64(lv_snap->lv_block_exception[idx].rsector_org);
273 lv_COW_table[idx_COW_table].pv_snap_number = LVM_TO_DISK64(lvm_pv_get_number(vg, snap_phys_dev));
274 lv_COW_table[idx_COW_table].pv_snap_rsector = LVM_TO_DISK64(lv_snap->lv_block_exception[idx].rsector_new);
275
276 length_tmp = iobuf->length;
277 iobuf->length = blksize_snap;
278 page_tmp = iobuf->maplist[0];
279 iobuf->maplist[0] = lv_snap->lv_COW_table_page;
280 nr_pages_tmp = iobuf->nr_pages;
281 iobuf->nr_pages = 1;
282
283 if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
284 blocks, blksize_snap) != blksize_snap)
285 goto fail_raw_write;
286
287
288 /* initialization of next COW exception table block with zeroes */
289 end_of_table = idx % COW_entries_per_pe == COW_entries_per_pe - 1;
290 if (idx_COW_table % COW_entries_per_block == COW_entries_per_block - 1 || end_of_table)
291 {
292 /* don't go beyond the end */
293 if (idx + 1 >= lv_snap->lv_remap_end) goto good_out;
294
295 memset(lv_COW_table, 0, blksize_snap);
296
297 if (end_of_table)
298 {
299 idx++;
300 snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
301 snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
302 blksize_snap = lvm_get_blksize(snap_phys_dev);
303 blocks[0] = snap_pe_start >> (blksize_snap >> 10);
304 } else blocks[0]++;
305
306 if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
307 blocks, blksize_snap) != blksize_snap)
308 goto fail_raw_write;
309 }
310
311
312 good_out:
313 iobuf->length = length_tmp;
314 iobuf->maplist[0] = page_tmp;
315 iobuf->nr_pages = nr_pages_tmp;
316 return 0;
317
318 /* slow path */
319 out:
320 lvm_drop_snapshot(lv_snap, reason);
321 return 1;
322
323 fail_raw_write:
324 reason = "write error";
325 goto out;
326 }
327
328 /*
329 * copy on write handler for one snapshot logical volume
330 *
331 * read the original blocks and store it/them on the new one(s).
332 * if there is no exception storage space free any longer --> release snapshot.
333 *
334 * this routine gets called for each _first_ write to a physical chunk.
335 */
336 int lvm_snapshot_COW(kdev_t org_phys_dev,
337 unsigned long org_phys_sector,
338 unsigned long org_pe_start,
339 unsigned long org_virt_sector,
340 lv_t * lv_snap)
341 {
342 const char * reason;
343 unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off;
344 int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size;
345 struct kiobuf * iobuf;
346 unsigned long blocks[KIO_MAX_SECTORS];
347 int blksize_snap, blksize_org, min_blksize, max_blksize;
348 int max_sectors, nr_sectors;
349
350 /* check if we are out of snapshot space */
351 if (idx >= lv_snap->lv_remap_end)
352 goto fail_out_of_space;
353
354 /* calculate physical boundaries of source chunk */
355 pe_off = org_pe_start % chunk_size;
356 org_start = org_phys_sector - ((org_phys_sector-pe_off) % chunk_size);
357 virt_start = org_virt_sector - (org_phys_sector - org_start);
358
359 /* calculate physical boundaries of destination chunk */
360 snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
361 snap_start = lv_snap->lv_block_exception[idx].rsector_new;
362
363 #ifdef DEBUG_SNAPSHOT
364 printk(KERN_INFO
365 "%s -- COW: "
366 "org %02d:%02d faulting %lu start %lu, "
367 "snap %02d:%02d start %lu, "
368 "size %d, pe_start %lu pe_off %lu, virt_sec %lu\n",
369 lvm_name,
370 MAJOR(org_phys_dev), MINOR(org_phys_dev), org_phys_sector,
371 org_start,
372 MAJOR(snap_phys_dev), MINOR(snap_phys_dev), snap_start,
373 chunk_size,
374 org_pe_start, pe_off,
375 org_virt_sector);
376 #endif
377
378 iobuf = lv_snap->lv_iobuf;
379
380 blksize_org = lvm_get_blksize(org_phys_dev);
381 blksize_snap = lvm_get_blksize(snap_phys_dev);
382 max_blksize = max(blksize_org, blksize_snap);
383 min_blksize = min(blksize_org, blksize_snap);
384 max_sectors = KIO_MAX_SECTORS * (min_blksize>>9);
385
386 if (chunk_size % (max_blksize>>9))
387 goto fail_blksize;
388
389 while (chunk_size)
390 {
391 nr_sectors = min(chunk_size, max_sectors);
392 chunk_size -= nr_sectors;
393
394 iobuf->length = nr_sectors << 9;
395
396 lvm_snapshot_prepare_blocks(blocks, org_start,
397 nr_sectors, blksize_org);
398 if (brw_kiovec(READ, 1, &iobuf, org_phys_dev,
399 blocks, blksize_org) != (nr_sectors<<9))
400 goto fail_raw_read;
401
402 lvm_snapshot_prepare_blocks(blocks, snap_start,
403 nr_sectors, blksize_snap);
404 if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
405 blocks, blksize_snap) != (nr_sectors<<9))
406 goto fail_raw_write;
407 }
408
409 #ifdef DEBUG_SNAPSHOT
410 /* invalidate the logical snapshot buffer cache */
411 invalidate_snap_cache(virt_start, lv_snap->lv_chunk_size,
412 lv_snap->lv_dev);
413 #endif
414
415 /* the original chunk is now stored on the snapshot volume
416 so update the execption table */
417 lv_snap->lv_block_exception[idx].rdev_org = org_phys_dev;
418 lv_snap->lv_block_exception[idx].rsector_org = org_start;
419
420 lvm_hash_link(lv_snap->lv_block_exception + idx,
421 org_phys_dev, org_start, lv_snap);
422 lv_snap->lv_remap_ptr = idx + 1;
423 if (lv_snap->lv_snapshot_use_rate > 0) {
424 if (lv_snap->lv_remap_ptr * 100 / lv_snap->lv_remap_end >= lv_snap->lv_snapshot_use_rate)
425 wake_up_interruptible(&lv_snap->lv_snapshot_wait);
426 }
427 return 0;
428
429 /* slow path */
430 out:
431 lvm_drop_snapshot(lv_snap, reason);
432 return 1;
433
434 fail_out_of_space:
435 reason = "out of space";
436 goto out;
437 fail_raw_read:
438 reason = "read error";
439 goto out;
440 fail_raw_write:
441 reason = "write error";
442 goto out;
443 fail_blksize:
444 reason = "blocksize error";
445 goto out;
446 }
447
448 int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors)
449 {
450 int bytes, nr_pages, err, i;
451
452 bytes = sectors << 9;
453 nr_pages = (bytes + ~PAGE_MASK) >> PAGE_SHIFT;
454 err = expand_kiobuf(iobuf, nr_pages);
455 if (err)
456 goto out;
457
458 err = -ENOMEM;
459 iobuf->locked = 0;
460 iobuf->nr_pages = 0;
461 for (i = 0; i < nr_pages; i++)
462 {
463 struct page * page;
464
465 page = alloc_page(GFP_KERNEL);
466 if (!page)
467 goto out;
468
469 iobuf->maplist[i] = page;
470 iobuf->nr_pages++;
471 }
472 iobuf->offset = 0;
473
474 err = 0;
475 out:
476 return err;
477 }
478
479 static int calc_max_buckets(void)
480 {
481 unsigned long mem;
482
483 mem = num_physpages << PAGE_SHIFT;
484 mem /= 100;
485 mem *= 2;
486 mem /= sizeof(struct list_head);
487
488 return mem;
489 }
490
491 int lvm_snapshot_alloc_hash_table(lv_t * lv)
492 {
493 int err;
494 unsigned long buckets, max_buckets, size;
495 struct list_head * hash;
496
497 buckets = lv->lv_remap_end;
498 max_buckets = calc_max_buckets();
499 buckets = min(buckets, max_buckets);
500 while (buckets & (buckets-1))
501 buckets &= (buckets-1);
502
503 size = buckets * sizeof(struct list_head);
504
505 err = -ENOMEM;
506 hash = vmalloc(size);
507 lv->lv_snapshot_hash_table = hash;
508
509 if (!hash)
510 goto out;
511 lv->lv_snapshot_hash_table_size = size;
512
513 lv->lv_snapshot_hash_mask = buckets-1;
514 while (buckets--)
515 INIT_LIST_HEAD(hash+buckets);
516 err = 0;
517 out:
518 return err;
519 }
520
521 int lvm_snapshot_alloc(lv_t * lv_snap)
522 {
523 int err, blocksize, max_sectors;
524
525 err = alloc_kiovec(1, &lv_snap->lv_iobuf);
526 if (err)
527 goto out;
528
529 blocksize = lvm_blocksizes[MINOR(lv_snap->lv_dev)];
530 max_sectors = KIO_MAX_SECTORS << (PAGE_SHIFT-9);
531
532 err = lvm_snapshot_alloc_iobuf_pages(lv_snap->lv_iobuf, max_sectors);
533 if (err)
534 goto out_free_kiovec;
535
536 err = lvm_snapshot_alloc_hash_table(lv_snap);
537 if (err)
538 goto out_free_kiovec;
539
540
541 lv_snap->lv_COW_table_page = alloc_page(GFP_KERNEL);
542 if (!lv_snap->lv_COW_table_page)
543 goto out_free_kiovec;
544
545 out:
546 return err;
547
548 out_free_kiovec:
549 unmap_kiobuf(lv_snap->lv_iobuf);
550 free_kiovec(1, &lv_snap->lv_iobuf);
551 vfree(lv_snap->lv_snapshot_hash_table);
552 lv_snap->lv_snapshot_hash_table = NULL;
553 goto out;
554 }
555
556 void lvm_snapshot_release(lv_t * lv)
557 {
558 if (lv->lv_block_exception)
559 {
560 vfree(lv->lv_block_exception);
561 lv->lv_block_exception = NULL;
562 }
563 if (lv->lv_snapshot_hash_table)
564 {
565 vfree(lv->lv_snapshot_hash_table);
566 lv->lv_snapshot_hash_table = NULL;
567 lv->lv_snapshot_hash_table_size = 0;
568 }
569 if (lv->lv_iobuf)
570 {
571 unmap_kiobuf(lv->lv_iobuf);
572 free_kiovec(1, &lv->lv_iobuf);
573 lv->lv_iobuf = NULL;
574 }
575 if (lv->lv_COW_table_page)
576 {
577 free_page((ulong)lv->lv_COW_table_page);
578 lv->lv_COW_table_page = NULL;
579 }
580 }
581
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.