1 /*
2 * linux/fs/block_dev.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7 #include <linux/config.h>
8 #include <linux/init.h>
9 #include <linux/mm.h>
10 #include <linux/locks.h>
11 #include <linux/fcntl.h>
12 #include <linux/malloc.h>
13 #include <linux/kmod.h>
14 #include <linux/devfs_fs_kernel.h>
15 #include <linux/smp_lock.h>
16
17 #include <asm/uaccess.h>
18
19 extern int *blk_size[];
20 extern int *blksize_size[];
21
22 #define MAX_BUF_PER_PAGE (PAGE_SIZE / 512)
23 #define NBUF 64
24
25 ssize_t block_write(struct file * filp, const char * buf,
26 size_t count, loff_t *ppos)
27 {
28 struct inode * inode = filp->f_dentry->d_inode;
29 ssize_t blocksize, blocksize_bits, i, buffercount, write_error;
30 ssize_t block, blocks;
31 loff_t offset;
32 ssize_t chars;
33 ssize_t written;
34 struct buffer_head * bhlist[NBUF];
35 size_t size;
36 kdev_t dev = inode->i_rdev;
37 struct buffer_head * bh, *bufferlist[NBUF];
38 register char * p;
39
40 if (is_read_only(dev))
41 return -EPERM;
42
43 written = write_error = buffercount = 0;
44 blocksize = BLOCK_SIZE;
45 if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
46 blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
47
48 i = blocksize;
49 blocksize_bits = 0;
50 while(i != 1) {
51 blocksize_bits++;
52 i >>= 1;
53 }
54
55 block = *ppos >> blocksize_bits;
56 offset = *ppos & (blocksize-1);
57
58 if (blk_size[MAJOR(dev)])
59 size = ((loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS) >> blocksize_bits;
60 else
61 size = INT_MAX;
62 while (count>0) {
63 if (block >= size)
64 return written ? written : -ENOSPC;
65 chars = blocksize - offset;
66 if (chars > count)
67 chars=count;
68
69 #if 0
70 /* get the buffer head */
71 {
72 struct buffer_head * (*fn)(kdev_t, int, int) = getblk;
73 if (chars != blocksize)
74 fn = bread;
75 bh = fn(dev, block, blocksize);
76 if (!bh)
77 return written ? written : -EIO;
78 if (!buffer_uptodate(bh))
79 wait_on_buffer(bh);
80 }
81 #else
82 bh = getblk(dev, block, blocksize);
83 if (!bh)
84 return written ? written : -EIO;
85
86 if (!buffer_uptodate(bh))
87 {
88 if (chars == blocksize)
89 wait_on_buffer(bh);
90 else
91 {
92 bhlist[0] = bh;
93 if (!filp->f_reada || !read_ahead[MAJOR(dev)]) {
94 /* We do this to force the read of a single buffer */
95 blocks = 1;
96 } else {
97 /* Read-ahead before write */
98 blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9) / 2;
99 if (block + blocks > size) blocks = size - block;
100 if (blocks > NBUF) blocks=NBUF;
101 if (!blocks) blocks = 1;
102 for(i=1; i<blocks; i++)
103 {
104 bhlist[i] = getblk (dev, block+i, blocksize);
105 if (!bhlist[i])
106 {
107 while(i >= 0) brelse(bhlist[i--]);
108 return written ? written : -EIO;
109 }
110 }
111 }
112 ll_rw_block(READ, blocks, bhlist);
113 for(i=1; i<blocks; i++) brelse(bhlist[i]);
114 wait_on_buffer(bh);
115 if (!buffer_uptodate(bh)) {
116 brelse(bh);
117 return written ? written : -EIO;
118 }
119 };
120 };
121 #endif
122 block++;
123 p = offset + bh->b_data;
124 offset = 0;
125 *ppos += chars;
126 written += chars;
127 count -= chars;
128 copy_from_user(p,buf,chars);
129 p += chars;
130 buf += chars;
131 mark_buffer_uptodate(bh, 1);
132 mark_buffer_dirty(bh);
133 if (filp->f_flags & O_SYNC)
134 bufferlist[buffercount++] = bh;
135 else
136 brelse(bh);
137 if (buffercount == NBUF){
138 ll_rw_block(WRITE, buffercount, bufferlist);
139 for(i=0; i<buffercount; i++){
140 wait_on_buffer(bufferlist[i]);
141 if (!buffer_uptodate(bufferlist[i]))
142 write_error=1;
143 brelse(bufferlist[i]);
144 }
145 buffercount=0;
146 }
147 balance_dirty(dev);
148 if (write_error)
149 break;
150 }
151 if ( buffercount ){
152 ll_rw_block(WRITE, buffercount, bufferlist);
153 for(i=0; i<buffercount; i++){
154 wait_on_buffer(bufferlist[i]);
155 if (!buffer_uptodate(bufferlist[i]))
156 write_error=1;
157 brelse(bufferlist[i]);
158 }
159 }
160 filp->f_reada = 1;
161 if(write_error)
162 return -EIO;
163 return written;
164 }
165
166 ssize_t block_read(struct file * filp, char * buf, size_t count, loff_t *ppos)
167 {
168 struct inode * inode = filp->f_dentry->d_inode;
169 size_t block;
170 loff_t offset;
171 ssize_t blocksize;
172 ssize_t blocksize_bits, i;
173 size_t blocks, rblocks, left;
174 int bhrequest, uptodate;
175 struct buffer_head ** bhb, ** bhe;
176 struct buffer_head * buflist[NBUF];
177 struct buffer_head * bhreq[NBUF];
178 unsigned int chars;
179 loff_t size;
180 kdev_t dev;
181 ssize_t read;
182
183 dev = inode->i_rdev;
184 blocksize = BLOCK_SIZE;
185 if (blksize_size[MAJOR(dev)] && blksize_size[MAJOR(dev)][MINOR(dev)])
186 blocksize = blksize_size[MAJOR(dev)][MINOR(dev)];
187 i = blocksize;
188 blocksize_bits = 0;
189 while (i != 1) {
190 blocksize_bits++;
191 i >>= 1;
192 }
193
194 offset = *ppos;
195 if (blk_size[MAJOR(dev)])
196 size = (loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS;
197 else
198 size = (loff_t) INT_MAX << BLOCK_SIZE_BITS;
199
200 if (offset > size)
201 left = 0;
202 /* size - offset might not fit into left, so check explicitly. */
203 else if (size - offset > INT_MAX)
204 left = INT_MAX;
205 else
206 left = size - offset;
207 if (left > count)
208 left = count;
209 if (left <= 0)
210 return 0;
211 read = 0;
212 block = offset >> blocksize_bits;
213 offset &= blocksize-1;
214 size >>= blocksize_bits;
215 rblocks = blocks = (left + offset + blocksize - 1) >> blocksize_bits;
216 bhb = bhe = buflist;
217 if (filp->f_reada) {
218 if (blocks < read_ahead[MAJOR(dev)] / (blocksize >> 9))
219 blocks = read_ahead[MAJOR(dev)] / (blocksize >> 9);
220 if (rblocks > blocks)
221 blocks = rblocks;
222
223 }
224 if (block + blocks > size) {
225 blocks = size - block;
226 if (blocks == 0)
227 return 0;
228 }
229
230 /* We do this in a two stage process. We first try to request
231 as many blocks as we can, then we wait for the first one to
232 complete, and then we try to wrap up as many as are actually
233 done. This routine is rather generic, in that it can be used
234 in a filesystem by substituting the appropriate function in
235 for getblk.
236
237 This routine is optimized to make maximum use of the various
238 buffers and caches. */
239
240 do {
241 bhrequest = 0;
242 uptodate = 1;
243 while (blocks) {
244 --blocks;
245 *bhb = getblk(dev, block++, blocksize);
246 if (*bhb && !buffer_uptodate(*bhb)) {
247 uptodate = 0;
248 bhreq[bhrequest++] = *bhb;
249 }
250
251 if (++bhb == &buflist[NBUF])
252 bhb = buflist;
253
254 /* If the block we have on hand is uptodate, go ahead
255 and complete processing. */
256 if (uptodate)
257 break;
258 if (bhb == bhe)
259 break;
260 }
261
262 /* Now request them all */
263 if (bhrequest) {
264 ll_rw_block(READ, bhrequest, bhreq);
265 }
266
267 do { /* Finish off all I/O that has actually completed */
268 if (*bhe) {
269 wait_on_buffer(*bhe);
270 if (!buffer_uptodate(*bhe)) { /* read error? */
271 brelse(*bhe);
272 if (++bhe == &buflist[NBUF])
273 bhe = buflist;
274 left = 0;
275 break;
276 }
277 }
278 if (left < blocksize - offset)
279 chars = left;
280 else
281 chars = blocksize - offset;
282 *ppos += chars;
283 left -= chars;
284 read += chars;
285 if (*bhe) {
286 copy_to_user(buf,offset+(*bhe)->b_data,chars);
287 brelse(*bhe);
288 buf += chars;
289 } else {
290 while (chars-- > 0)
291 put_user(0,buf++);
292 }
293 offset = 0;
294 if (++bhe == &buflist[NBUF])
295 bhe = buflist;
296 } while (left > 0 && bhe != bhb && (!*bhe || !buffer_locked(*bhe)));
297 if (bhe == bhb && !blocks)
298 break;
299 } while (left > 0);
300
301 /* Release the read-ahead blocks */
302 while (bhe != bhb) {
303 brelse(*bhe);
304 if (++bhe == &buflist[NBUF])
305 bhe = buflist;
306 };
307 if (!read)
308 return -EIO;
309 filp->f_reada = 1;
310 return read;
311 }
312
313 /*
314 * private llseek:
315 * for a block special file file->f_dentry->d_inode->i_size is zero
316 * so we compute the size by hand (just as in block_read/write above)
317 */
318 static loff_t block_llseek(struct file *file, loff_t offset, int origin)
319 {
320 long long retval;
321 kdev_t dev;
322
323 switch (origin) {
324 case 2:
325 dev = file->f_dentry->d_inode->i_rdev;
326 if (blk_size[MAJOR(dev)])
327 offset += (loff_t) blk_size[MAJOR(dev)][MINOR(dev)] << BLOCK_SIZE_BITS;
328 /* else? return -EINVAL? */
329 break;
330 case 1:
331 offset += file->f_pos;
332 }
333 retval = -EINVAL;
334 if (offset >= 0) {
335 if (offset != file->f_pos) {
336 file->f_pos = offset;
337 file->f_reada = 0;
338 file->f_version = ++event;
339 }
340 retval = offset;
341 }
342 return retval;
343 }
344
345
346 /*
347 * Filp may be NULL when we are called by an msync of a vma
348 * since the vma has no handle.
349 */
350
351 static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
352 {
353 return fsync_dev(dentry->d_inode->i_rdev);
354 }
355
356 /*
357 * bdev cache handling - shamelessly stolen from inode.c
358 * We use smaller hashtable, though.
359 */
360
361 #define HASH_BITS 6
362 #define HASH_SIZE (1UL << HASH_BITS)
363 #define HASH_MASK (HASH_SIZE-1)
364 static struct list_head bdev_hashtable[HASH_SIZE];
365 static spinlock_t bdev_lock = SPIN_LOCK_UNLOCKED;
366 static kmem_cache_t * bdev_cachep;
367
368 #define alloc_bdev() \
369 ((struct block_device *) kmem_cache_alloc(bdev_cachep, SLAB_KERNEL))
370 #define destroy_bdev(bdev) kmem_cache_free(bdev_cachep, (bdev))
371
372 static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
373 {
374 struct block_device * bdev = (struct block_device *) foo;
375
376 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
377 SLAB_CTOR_CONSTRUCTOR)
378 {
379 memset(bdev, 0, sizeof(*bdev));
380 sema_init(&bdev->bd_sem, 1);
381 }
382 }
383
384 void __init bdev_init(void)
385 {
386 int i;
387 struct list_head *head = bdev_hashtable;
388
389 i = HASH_SIZE;
390 do {
391 INIT_LIST_HEAD(head);
392 head++;
393 i--;
394 } while (i);
395
396 bdev_cachep = kmem_cache_create("bdev_cache",
397 sizeof(struct block_device),
398 0, SLAB_HWCACHE_ALIGN, init_once,
399 NULL);
400 if (!bdev_cachep)
401 panic("Cannot create bdev_cache SLAB cache");
402 }
403
404 /*
405 * Most likely _very_ bad one - but then it's hardly critical for small
406 * /dev and can be fixed when somebody will need really large one.
407 */
408 static inline unsigned long hash(dev_t dev)
409 {
410 unsigned long tmp = dev;
411 tmp = tmp + (tmp >> HASH_BITS) + (tmp >> HASH_BITS*2);
412 return tmp & HASH_MASK;
413 }
414
415 static struct block_device *bdfind(dev_t dev, struct list_head *head)
416 {
417 struct list_head *p;
418 struct block_device *bdev;
419 for (p=head->next; p!=head; p=p->next) {
420 bdev = list_entry(p, struct block_device, bd_hash);
421 if (bdev->bd_dev != dev)
422 continue;
423 atomic_inc(&bdev->bd_count);
424 return bdev;
425 }
426 return NULL;
427 }
428
429 struct block_device *bdget(dev_t dev)
430 {
431 struct list_head * head = bdev_hashtable + hash(dev);
432 struct block_device *bdev, *new_bdev;
433 spin_lock(&bdev_lock);
434 bdev = bdfind(dev, head);
435 spin_unlock(&bdev_lock);
436 if (bdev)
437 return bdev;
438 new_bdev = alloc_bdev();
439 if (!new_bdev)
440 return NULL;
441 atomic_set(&new_bdev->bd_count,1);
442 new_bdev->bd_dev = dev;
443 new_bdev->bd_op = NULL;
444 spin_lock(&bdev_lock);
445 bdev = bdfind(dev, head);
446 if (!bdev) {
447 list_add(&new_bdev->bd_hash, head);
448 spin_unlock(&bdev_lock);
449 return new_bdev;
450 }
451 spin_unlock(&bdev_lock);
452 destroy_bdev(new_bdev);
453 return bdev;
454 }
455
456 void bdput(struct block_device *bdev)
457 {
458 if (atomic_dec_and_test(&bdev->bd_count)) {
459 spin_lock(&bdev_lock);
460 if (atomic_read(&bdev->bd_openers))
461 BUG();
462 list_del(&bdev->bd_hash);
463 spin_unlock(&bdev_lock);
464 destroy_bdev(bdev);
465 }
466 }
467
468 static struct {
469 const char *name;
470 struct block_device_operations *bdops;
471 } blkdevs[MAX_BLKDEV];
472
473 int get_blkdev_list(char * p)
474 {
475 int i;
476 int len;
477
478 len = sprintf(p, "\nBlock devices:\n");
479 for (i = 0; i < MAX_BLKDEV ; i++) {
480 if (blkdevs[i].bdops) {
481 len += sprintf(p+len, "%3d %s\n", i, blkdevs[i].name);
482 }
483 }
484 return len;
485 }
486
487 /*
488 Return the function table of a device.
489 Load the driver if needed.
490 */
491 const struct block_device_operations * get_blkfops(unsigned int major)
492 {
493 const struct block_device_operations *ret = NULL;
494
495 /* major 0 is used for non-device mounts */
496 if (major && major < MAX_BLKDEV) {
497 #ifdef CONFIG_KMOD
498 if (!blkdevs[major].bdops) {
499 char name[20];
500 sprintf(name, "block-major-%d", major);
501 request_module(name);
502 }
503 #endif
504 ret = blkdevs[major].bdops;
505 }
506 return ret;
507 }
508
509 int register_blkdev(unsigned int major, const char * name, struct block_device_operations *bdops)
510 {
511 if (major == 0) {
512 for (major = MAX_BLKDEV-1; major > 0; major--) {
513 if (blkdevs[major].bdops == NULL) {
514 blkdevs[major].name = name;
515 blkdevs[major].bdops = bdops;
516 return major;
517 }
518 }
519 return -EBUSY;
520 }
521 if (major >= MAX_BLKDEV)
522 return -EINVAL;
523 if (blkdevs[major].bdops && blkdevs[major].bdops != bdops)
524 return -EBUSY;
525 blkdevs[major].name = name;
526 blkdevs[major].bdops = bdops;
527 return 0;
528 }
529
530 int unregister_blkdev(unsigned int major, const char * name)
531 {
532 if (major >= MAX_BLKDEV)
533 return -EINVAL;
534 if (!blkdevs[major].bdops)
535 return -EINVAL;
536 if (strcmp(blkdevs[major].name, name))
537 return -EINVAL;
538 blkdevs[major].name = NULL;
539 blkdevs[major].bdops = NULL;
540 return 0;
541 }
542
543 /*
544 * This routine checks whether a removable media has been changed,
545 * and invalidates all buffer-cache-entries in that case. This
546 * is a relatively slow routine, so we have to try to minimize using
547 * it. Thus it is called only upon a 'mount' or 'open'. This
548 * is the best way of combining speed and utility, I think.
549 * People changing diskettes in the middle of an operation deserve
550 * to lose :-)
551 */
552 int check_disk_change(kdev_t dev)
553 {
554 int i;
555 const struct block_device_operations * bdops = NULL;
556 struct super_block * sb;
557
558 i = MAJOR(dev);
559 if (i < MAX_BLKDEV)
560 bdops = blkdevs[i].bdops;
561 if (bdops == NULL) {
562 devfs_handle_t de;
563
564 de = devfs_find_handle (NULL, NULL, i, MINOR (dev),
565 DEVFS_SPECIAL_BLK, 0);
566 if (de) bdops = devfs_get_ops (de);
567 }
568 if (bdops == NULL)
569 return 0;
570 if (bdops->check_media_change == NULL)
571 return 0;
572 if (!bdops->check_media_change(dev))
573 return 0;
574
575 printk(KERN_DEBUG "VFS: Disk change detected on device %s\n",
576 bdevname(dev));
577
578 sb = get_super(dev);
579 if (sb && invalidate_inodes(sb))
580 printk("VFS: busy inodes on changed media.\n");
581
582 destroy_buffers(dev);
583
584 if (bdops->revalidate)
585 bdops->revalidate(dev);
586 return 1;
587 }
588
589 int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
590 {
591 kdev_t rdev = to_kdev_t(bdev->bd_dev);
592 struct inode inode_fake;
593 int res;
594 mm_segment_t old_fs = get_fs();
595
596 if (!bdev->bd_op->ioctl)
597 return -EINVAL;
598 inode_fake.i_rdev=rdev;
599 init_waitqueue_head(&inode_fake.i_wait);
600 set_fs(KERNEL_DS);
601 res = bdev->bd_op->ioctl(&inode_fake, NULL, cmd, arg);
602 set_fs(old_fs);
603 return res;
604 }
605
606 int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, int kind)
607 {
608 int ret = -ENODEV;
609 kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
610 down(&bdev->bd_sem);
611 if (!bdev->bd_op)
612 bdev->bd_op = get_blkfops(MAJOR(rdev));
613 if (bdev->bd_op) {
614 /*
615 * This crockload is due to bad choice of ->open() type.
616 * It will go away.
617 * For now, block device ->open() routine must _not_
618 * examine anything in 'inode' argument except ->i_rdev.
619 */
620 struct file fake_file = {};
621 struct dentry fake_dentry = {};
622 struct inode *fake_inode = get_empty_inode();
623 ret = -ENOMEM;
624 if (fake_inode) {
625 fake_file.f_mode = mode;
626 fake_file.f_flags = flags;
627 fake_file.f_dentry = &fake_dentry;
628 fake_dentry.d_inode = fake_inode;
629 fake_inode->i_rdev = rdev;
630 ret = 0;
631 if (bdev->bd_op->open)
632 ret = bdev->bd_op->open(fake_inode, &fake_file);
633 if (!ret)
634 atomic_inc(&bdev->bd_openers);
635 else if (!atomic_read(&bdev->bd_openers))
636 bdev->bd_op = NULL;
637 iput(fake_inode);
638 }
639 }
640 up(&bdev->bd_sem);
641 return ret;
642 }
643
644 int blkdev_open(struct inode * inode, struct file * filp)
645 {
646 int ret = -ENXIO;
647 struct block_device *bdev = inode->i_bdev;
648 down(&bdev->bd_sem);
649 lock_kernel();
650 if (!bdev->bd_op)
651 bdev->bd_op = get_blkfops(MAJOR(inode->i_rdev));
652 if (bdev->bd_op) {
653 ret = 0;
654 if (bdev->bd_op->open)
655 ret = bdev->bd_op->open(inode,filp);
656 if (!ret)
657 atomic_inc(&bdev->bd_openers);
658 else if (!atomic_read(&bdev->bd_openers))
659 bdev->bd_op = NULL;
660 }
661 unlock_kernel();
662 up(&bdev->bd_sem);
663 return ret;
664 }
665
666 int blkdev_put(struct block_device *bdev, int kind)
667 {
668 int ret = 0;
669 kdev_t rdev = to_kdev_t(bdev->bd_dev); /* this should become bdev */
670 down(&bdev->bd_sem);
671 /* syncing will go here */
672 lock_kernel();
673 if (kind == BDEV_FILE || kind == BDEV_FS)
674 fsync_dev(rdev);
675 if (atomic_dec_and_test(&bdev->bd_openers)) {
676 /* invalidating buffers will go here */
677 invalidate_buffers(rdev);
678 }
679 if (bdev->bd_op->release) {
680 struct inode * fake_inode = get_empty_inode();
681 ret = -ENOMEM;
682 if (fake_inode) {
683 fake_inode->i_rdev = rdev;
684 ret = bdev->bd_op->release(fake_inode, NULL);
685 iput(fake_inode);
686 }
687 }
688 if (!atomic_read(&bdev->bd_openers))
689 bdev->bd_op = NULL; /* we can't rely on driver being */
690 /* kind to stay around. */
691 unlock_kernel();
692 up(&bdev->bd_sem);
693 return ret;
694 }
695
696 static int blkdev_close(struct inode * inode, struct file * filp)
697 {
698 return blkdev_put(inode->i_bdev, BDEV_FILE);
699 }
700
701 static int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
702 unsigned long arg)
703 {
704 if (inode->i_bdev->bd_op->ioctl)
705 return inode->i_bdev->bd_op->ioctl(inode, file, cmd, arg);
706 return -EINVAL;
707 }
708
709 struct file_operations def_blk_fops = {
710 open: blkdev_open,
711 release: blkdev_close,
712 llseek: block_llseek,
713 read: block_read,
714 write: block_write,
715 fsync: block_fsync,
716 ioctl: blkdev_ioctl,
717 };
718
719 const char * bdevname(kdev_t dev)
720 {
721 static char buffer[32];
722 const char * name = blkdevs[MAJOR(dev)].name;
723
724 if (!name)
725 name = "unknown-block";
726
727 sprintf(buffer, "%s(%d,%d)", name, MAJOR(dev), MINOR(dev));
728 return buffer;
729 }
730
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.