~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/drivers/block/ll_rw_blk.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *  linux/drivers/block/ll_rw_blk.c
  3  *
  4  * Copyright (C) 1991, 1992 Linus Torvalds
  5  * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
  6  * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
  7  * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
  8  * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> -  July2000
  9  */
 10 
 11 /*
 12  * This handles all read/write requests to block devices
 13  */
 14 #include <linux/sched.h>
 15 #include <linux/kernel.h>
 16 #include <linux/kernel_stat.h>
 17 #include <linux/errno.h>
 18 #include <linux/string.h>
 19 #include <linux/config.h>
 20 #include <linux/locks.h>
 21 #include <linux/mm.h>
 22 #include <linux/init.h>
 23 #include <linux/smp_lock.h>
 24 
 25 #include <asm/system.h>
 26 #include <asm/io.h>
 27 #include <linux/blk.h>
 28 #include <linux/highmem.h>
 29 #include <linux/raid/md.h>
 30 
 31 #include <linux/module.h>
 32 
 33 /*
 34  * MAC Floppy IWM hooks
 35  */
 36 
 37 #ifdef CONFIG_MAC_FLOPPY_IWM
 38 extern int mac_floppy_init(void);
 39 #endif
 40 
 41 extern int lvm_init(void);
 42 
 43 /*
 44  * For the allocated request tables
 45  */
 46 static kmem_cache_t *request_cachep;
 47 
 48 /*
 49  * The "disk" task queue is used to start the actual requests
 50  * after a plug
 51  */
 52 DECLARE_TASK_QUEUE(tq_disk);
 53 
 54 /*
 55  * Protect the request list against multiple users..
 56  *
 57  * With this spinlock the Linux block IO subsystem is 100% SMP threaded
 58  * from the IRQ event side, and almost 100% SMP threaded from the syscall
 59  * side (we still have protect against block device array operations, and
 60  * the do_request() side is casually still unsafe. The kernel lock protects
 61  * this part currently.).
 62  *
 63  * there is a fair chance that things will work just OK if these functions
 64  * are called with no global kernel lock held ...
 65  */
 66 spinlock_t io_request_lock = SPIN_LOCK_UNLOCKED;
 67 
 68 /* This specifies how many sectors to read ahead on the disk. */
 69 
 70 int read_ahead[MAX_BLKDEV];
 71 
 72 /* blk_dev_struct is:
 73  *      *request_fn
 74  *      *current_request
 75  */
 76 struct blk_dev_struct blk_dev[MAX_BLKDEV]; /* initialized by blk_dev_init() */
 77 
 78 /*
 79  * blk_size contains the size of all block-devices in units of 1024 byte
 80  * sectors:
 81  *
 82  * blk_size[MAJOR][MINOR]
 83  *
 84  * if (!blk_size[MAJOR]) then no minor size checking is done.
 85  */
 86 int * blk_size[MAX_BLKDEV];
 87 
 88 /*
 89  * blksize_size contains the size of all block-devices:
 90  *
 91  * blksize_size[MAJOR][MINOR]
 92  *
 93  * if (!blksize_size[MAJOR]) then 1024 bytes is assumed.
 94  */
 95 int * blksize_size[MAX_BLKDEV];
 96 
 97 /*
 98  * hardsect_size contains the size of the hardware sector of a device.
 99  *
100  * hardsect_size[MAJOR][MINOR]
101  *
102  * if (!hardsect_size[MAJOR])
103  *              then 512 bytes is assumed.
104  * else
105  *              sector_size is hardsect_size[MAJOR][MINOR]
106  * This is currently set by some scsi devices and read by the msdos fs driver.
107  * Other uses may appear later.
108  */
109 int * hardsect_size[MAX_BLKDEV];
110 
111 /*
112  * The following tunes the read-ahead algorithm in mm/filemap.c
113  */
114 int * max_readahead[MAX_BLKDEV];
115 
116 /*
117  * Max number of sectors per request
118  */
119 int * max_sectors[MAX_BLKDEV];
120 
121 static inline int get_max_sectors(kdev_t dev)
122 {
123         if (!max_sectors[MAJOR(dev)])
124                 return MAX_SECTORS;
125         return max_sectors[MAJOR(dev)][MINOR(dev)];
126 }
127 
128 static inline request_queue_t *__blk_get_queue(kdev_t dev)
129 {
130         struct blk_dev_struct *bdev = blk_dev + MAJOR(dev);
131 
132         if (bdev->queue)
133                 return bdev->queue(dev);
134         else
135                 return &blk_dev[MAJOR(dev)].request_queue;
136 }
137 
138 /*
139  * NOTE: the device-specific queue() functions
140  * have to be atomic!
141  */
142 request_queue_t *blk_get_queue(kdev_t dev)
143 {
144         request_queue_t *ret;
145         unsigned long flags;
146 
147         spin_lock_irqsave(&io_request_lock,flags);
148         ret = __blk_get_queue(dev);
149         spin_unlock_irqrestore(&io_request_lock,flags);
150 
151         return ret;
152 }
153 
154 static int __blk_cleanup_queue(struct list_head *head)
155 {
156         struct list_head *entry;
157         struct request *rq;
158         int i = 0;
159 
160         if (list_empty(head))
161                 return 0;
162 
163         entry = head->next;
164         do {
165                 rq = list_entry(entry, struct request, table);
166                 entry = entry->next;
167                 list_del(&rq->table);
168                 kmem_cache_free(request_cachep, rq);
169                 i++;
170         } while (!list_empty(head));
171 
172         return i;
173 }
174 
175 /**
176  * blk_cleanup_queue: - release a &request_queue_t when it is no longer needed
177  * @q:    the request queue to be released
178  *
179  * Description:
180  *     blk_cleanup_queue is the pair to blk_init_queue().  It should
181  *     be called when a request queue is being released; typically
182  *     when a block device is being de-registered.  Currently, its
183  *     primary task it to free all the &struct request structures that
184  *     were allocated to the queue.
185  * Caveat: 
186  *     Hopefully the low level driver will have finished any
187  *     outstanding requests first...
188  **/
189 void blk_cleanup_queue(request_queue_t * q)
190 {
191         int count = QUEUE_NR_REQUESTS;
192 
193         count -= __blk_cleanup_queue(&q->request_freelist[READ]);
194         count -= __blk_cleanup_queue(&q->request_freelist[WRITE]);
195 
196         if (count)
197                 printk("blk_cleanup_queue: leaked requests (%d)\n", count);
198 
199         memset(q, 0, sizeof(*q));
200 }
201 
202 /**
203  * blk_queue_headactive - indicate whether head of request queue may be active
204  * @q:       The queue which this applies to.
205  * @active:  A flag indication where the head of the queue is active.
206  *
207  * Description:
208  *    The driver for a block device may choose to leave the currently active
209  *    request on the request queue, removing it only when it has completed.
210  *    The queue handling routines assume this by default for safety reasons
211  *    and will not involve the head of the request queue in any merging or
212  *    reordering of requests when the queue is unplugged (and thus may be
213  *    working on this particular request).
214  *
215  *    If a driver removes requests from the queue before processing them, then
216  *    it may indicate that it does so, there by allowing the head of the queue
217  *    to be involved in merging and reordering.  This is done be calling
218  *    blk_queue_headactive() with an @active flag of %0.
219  *
220  *    If a driver processes several requests at once, it must remove them (or
221  *    at least all but one of them) from the request queue.
222  *
223  *    When a queue is plugged (see blk_queue_pluggable()) the head will be
224  *    assumed to be inactive.
225  **/
226  
227 void blk_queue_headactive(request_queue_t * q, int active)
228 {
229         q->head_active = active;
230 }
231 
232 /**
233  * blk_queue_pluggable - define a plugging function for a request queue
234  * @q:   the request queue to which the function will apply
235  * @plug: the function to be called to plug a queue
236  *
237  * Description:
238  *   A request queue will be "plugged" if a request is added to it
239  *   while it is empty.  This allows a number of requests to be added
240  *   before any are processed, thus providing an opportunity for these
241  *   requests to be merged or re-ordered.
242  *   The default plugging function (generic_plug_device()) sets the
243  *   "plugged" flag for the queue and adds a task to the $tq_disk task
244  *   queue to unplug the queue and call the request function at a
245  *   later time.
246  *
247  *   A device driver may provide an alternate plugging function by
248  *   passing it to blk_queue_pluggable().  This function should set
249  *   the "plugged" flag if it want calls to the request_function to be
250  *   blocked, and should place a task on $tq_disk which will unplug
251  *   the queue.  Alternately it can simply do nothing and there-by
252  *   disable plugging of the device.
253  **/
254 
255 void blk_queue_pluggable (request_queue_t * q, plug_device_fn *plug)
256 {
257         q->plug_device_fn = plug;
258 }
259 
260 
261 /**
262  * blk_queue_make_request - define an alternate make_request function for a device
263  * @q:  the request queue for the device to be affected
264  * @mfn: the alternate make_request function
265  *
266  * Description:
267  *    The normal way for &struct buffer_heads to be passed to a device
268  *    driver is for them to be collected into requests on a request
269  *    queue, and then to allow the device driver to select requests
270  *    off that queue when it is ready.  This works well for many block
271  *    devices. However some block devices (typically virtual devices
272  *    such as md or lvm) do not benefit from the processing on the
273  *    request queue, and are served best by having the requests passed
274  *    directly to them.  This can be achieved by providing a function
275  *    to blk_queue_make_request().
276  *
277  * Caveat:
278  *    The driver that does this *must* be able to deal appropriately
279  *    with buffers in "highmemory", either by calling bh_kmap() to get
280  *    a kernel mapping, to by calling create_bounce() to create a
281  *    buffer in normal memory.
282  **/
283 
284 void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
285 {
286         q->make_request_fn = mfn;
287 }
288 
289 static inline int ll_new_segment(request_queue_t *q, struct request *req, int max_segments)
290 {
291         if (req->nr_segments < max_segments) {
292                 req->nr_segments++;
293                 q->elevator.nr_segments++;
294                 return 1;
295         }
296         return 0;
297 }
298 
299 static int ll_back_merge_fn(request_queue_t *q, struct request *req, 
300                             struct buffer_head *bh, int max_segments)
301 {
302         if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
303                 return 1;
304         return ll_new_segment(q, req, max_segments);
305 }
306 
307 static int ll_front_merge_fn(request_queue_t *q, struct request *req, 
308                              struct buffer_head *bh, int max_segments)
309 {
310         if (bh->b_data + bh->b_size == req->bh->b_data)
311                 return 1;
312         return ll_new_segment(q, req, max_segments);
313 }
314 
315 static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
316                                 struct request *next, int max_segments)
317 {
318         int total_segments = req->nr_segments + next->nr_segments;
319         int same_segment;
320 
321         same_segment = 0;
322         if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data) {
323                 total_segments--;
324                 same_segment = 1;
325         }
326     
327         if (total_segments > max_segments)
328                 return 0;
329 
330         q->elevator.nr_segments -= same_segment;
331         req->nr_segments = total_segments;
332         return 1;
333 }
334 
335 /*
336  * "plug" the device if there are no outstanding requests: this will
337  * force the transfer to start only after we have put all the requests
338  * on the list.
339  *
340  * This is called with interrupts off and no requests on the queue.
341  * (and with the request spinlock acquired)
342  */
343 static void generic_plug_device(request_queue_t *q, kdev_t dev)
344 {
345         /*
346          * no need to replug device
347          */
348         if (!list_empty(&q->queue_head) || q->plugged)
349                 return;
350 
351         q->plugged = 1;
352         queue_task(&q->plug_tq, &tq_disk);
353 }
354 
355 /*
356  * remove the plug and let it rip..
357  */
358 static inline void __generic_unplug_device(request_queue_t *q)
359 {
360         if (q->plugged) {
361                 q->plugged = 0;
362                 if (!list_empty(&q->queue_head))
363                         q->request_fn(q);
364         }
365 }
366 
367 static void generic_unplug_device(void *data)
368 {
369         request_queue_t *q = (request_queue_t *) data;
370         unsigned long flags;
371 
372         spin_lock_irqsave(&io_request_lock, flags);
373         __generic_unplug_device(q);
374         spin_unlock_irqrestore(&io_request_lock, flags);
375 }
376 
377 static void blk_init_free_list(request_queue_t *q)
378 {
379         struct request *rq;
380         int i;
381 
382         /*
383          * Divide requests in half between read and write. This used to
384          * be a 2/3 advantage for reads, but now reads can steal from
385          * the write free list.
386          */
387         for (i = 0; i < QUEUE_NR_REQUESTS; i++) {
388                 rq = kmem_cache_alloc(request_cachep, SLAB_KERNEL);
389                 rq->rq_status = RQ_INACTIVE;
390                 list_add(&rq->table, &q->request_freelist[i & 1]);
391         }
392 
393         init_waitqueue_head(&q->wait_for_request);
394         spin_lock_init(&q->request_lock);
395 }
396 
397 static int __make_request(request_queue_t * q, int rw, struct buffer_head * bh);
398 
399 /**
400  * blk_init_queue  - prepare a request queue for use with a block device
401  * @q:    The &request_queue_t to be initialised
402  * @rfn:  The function to be called to process requests that have been
403  *        placed on the queue.
404  *
405  * Description:
406  *    If a block device wishes to use the standard request handling procedures,
407  *    which sorts requests and coalesces adjacent requests, then it must
408  *    call blk_init_queue().  The function @rfn will be called when there
409  *    are requests on the queue that need to be processed.  If the device
410  *    supports plugging, then @rfn may not be called immediately when requests
411  *    are available on the queue, but may be called at some time later instead.
412  *    Plugged queues are generally unplugged when a buffer belonging to one
413  *    of the requests on the queue is needed, or due to memory pressure.
414  *
415  *    @rfn is not required, or even expected, to remove all requests off the
416  *    queue, but only as many as it can handle at a time.  If it does leave
417  *    requests on the queue, it is responsible for arranging that the requests
418  *    get dealt with eventually.
419  *
420  *    A global spin lock $io_request_lock must be held while manipulating the
421  *    requests on the request queue.
422  *
423  *    The request on the head of the queue is by default assumed to be
424  *    potentially active, and it is not considered for re-ordering or merging
425  *    whenever the given queue is unplugged. This behaviour can be changed with
426  *    blk_queue_headactive().
427  *
428  * Note:
429  *    blk_init_queue() must be paired with a blk_cleanup-queue() call
430  *    when the block device is deactivated (such as at module unload).
431  **/
432 void blk_init_queue(request_queue_t * q, request_fn_proc * rfn)
433 {
434         INIT_LIST_HEAD(&q->queue_head);
435         INIT_LIST_HEAD(&q->request_freelist[READ]);
436         INIT_LIST_HEAD(&q->request_freelist[WRITE]);
437         elevator_init(&q->elevator, ELEVATOR_LINUS);
438         blk_init_free_list(q);
439         q->request_fn           = rfn;
440         q->back_merge_fn        = ll_back_merge_fn;
441         q->front_merge_fn       = ll_front_merge_fn;
442         q->merge_requests_fn    = ll_merge_requests_fn;
443         q->make_request_fn      = __make_request;
444         q->plug_tq.sync         = 0;
445         q->plug_tq.routine      = &generic_unplug_device;
446         q->plug_tq.data         = q;
447         q->plugged              = 0;
448         /*
449          * These booleans describe the queue properties.  We set the
450          * default (and most common) values here.  Other drivers can
451          * use the appropriate functions to alter the queue properties.
452          * as appropriate.
453          */
454         q->plug_device_fn       = generic_plug_device;
455         q->head_active          = 1;
456 }
457 
458 
459 #define blkdev_free_rq(list) list_entry((list)->next, struct request, table);
460 /*
461  * Get a free request. io_request_lock must be held and interrupts
462  * disabled on the way in.
463  */
464 static inline struct request *get_request(request_queue_t *q, int rw)
465 {
466         struct list_head *list = &q->request_freelist[rw];
467         struct request *rq;
468 
469         /*
470          * Reads get preferential treatment and are allowed to steal
471          * from the write free list if necessary.
472          */
473         if (!list_empty(list)) {
474                 rq = blkdev_free_rq(list);
475                 goto got_rq;
476         }
477 
478         /*
479          * if the WRITE list is non-empty, we know that rw is READ
480          * and that the READ list is empty. allow reads to 'steal'
481          * from the WRITE list.
482          */
483         if (!list_empty(&q->request_freelist[WRITE])) {
484                 list = &q->request_freelist[WRITE];
485                 rq = blkdev_free_rq(list);
486                 goto got_rq;
487         }
488 
489         return NULL;
490 
491 got_rq:
492         list_del(&rq->table);
493         rq->free_list = list;
494         rq->rq_status = RQ_ACTIVE;
495         rq->special = NULL;
496         rq->q = q;
497         return rq;
498 }
499 
500 /*
501  * No available requests for this queue, unplug the device.
502  */
503 static struct request *__get_request_wait(request_queue_t *q, int rw)
504 {
505         register struct request *rq;
506         DECLARE_WAITQUEUE(wait, current);
507 
508         add_wait_queue_exclusive(&q->wait_for_request, &wait);
509         for (;;) {
510                 __set_current_state(TASK_UNINTERRUPTIBLE);
511                 spin_lock_irq(&io_request_lock);
512                 rq = get_request(q, rw);
513                 spin_unlock_irq(&io_request_lock);
514                 if (rq)
515                         break;
516                 generic_unplug_device(q);
517                 schedule();
518         }
519         remove_wait_queue(&q->wait_for_request, &wait);
520         current->state = TASK_RUNNING;
521         return rq;
522 }
523 
524 static inline struct request *get_request_wait(request_queue_t *q, int rw)
525 {
526         register struct request *rq;
527 
528         spin_lock_irq(&io_request_lock);
529         rq = get_request(q, rw);
530         spin_unlock_irq(&io_request_lock);
531         if (rq)
532                 return rq;
533         return __get_request_wait(q, rw);
534 }
535 
536 /* RO fail safe mechanism */
537 
538 static long ro_bits[MAX_BLKDEV][8];
539 
540 int is_read_only(kdev_t dev)
541 {
542         int minor,major;
543 
544         major = MAJOR(dev);
545         minor = MINOR(dev);
546         if (major < 0 || major >= MAX_BLKDEV) return 0;
547         return ro_bits[major][minor >> 5] & (1 << (minor & 31));
548 }
549 
550 void set_device_ro(kdev_t dev,int flag)
551 {
552         int minor,major;
553 
554         major = MAJOR(dev);
555         minor = MINOR(dev);
556         if (major < 0 || major >= MAX_BLKDEV) return;
557         if (flag) ro_bits[major][minor >> 5] |= 1 << (minor & 31);
558         else ro_bits[major][minor >> 5] &= ~(1 << (minor & 31));
559 }
560 
561 inline void drive_stat_acct (kdev_t dev, int rw,
562                                 unsigned long nr_sectors, int new_io)
563 {
564         unsigned int major = MAJOR(dev);
565         unsigned int index;
566 
567         index = disk_index(dev);
568         if ((index >= DK_MAX_DISK) || (major >= DK_MAX_MAJOR))
569                 return;
570 
571         kstat.dk_drive[major][index] += new_io;
572         if (rw == READ) {
573                 kstat.dk_drive_rio[major][index] += new_io;
574                 kstat.dk_drive_rblk[major][index] += nr_sectors;
575         } else if (rw == WRITE) {
576                 kstat.dk_drive_wio[major][index] += new_io;
577                 kstat.dk_drive_wblk[major][index] += nr_sectors;
578         } else
579                 printk(KERN_ERR "drive_stat_acct: cmd not R/W?\n");
580 }
581 
582 /*
583  * add-request adds a request to the linked list.
584  * It disables interrupts (acquires the request spinlock) so that it can muck
585  * with the request-lists in peace. Thus it should be called with no spinlocks
586  * held.
587  *
588  * By this point, req->cmd is always either READ/WRITE, never READA,
589  * which is important for drive_stat_acct() above.
590  */
591 
592 static inline void add_request(request_queue_t * q, struct request * req,
593                                struct list_head *head, int lat)
594 {
595         int major;
596 
597         drive_stat_acct(req->rq_dev, req->cmd, req->nr_sectors, 1);
598 
599         /*
600          * let selected elevator insert the request
601          */
602         q->elevator.elevator_fn(req, &q->elevator, &q->queue_head, head, lat);
603 
604         /*
605          * FIXME(eric) I don't understand why there is a need for this
606          * special case code.  It clearly doesn't fit any more with
607          * the new queueing architecture, and it got added in 2.3.10.
608          * I am leaving this in here until I hear back from the COMPAQ
609          * people.
610          */
611         major = MAJOR(req->rq_dev);
612         if (major >= COMPAQ_SMART2_MAJOR+0 && major <= COMPAQ_SMART2_MAJOR+7)
613                 (q->request_fn)(q);
614         if (major >= COMPAQ_CISS_MAJOR+0 && major <= COMPAQ_CISS_MAJOR+7)
615                 (q->request_fn)(q);
616         if (major >= DAC960_MAJOR+0 && major <= DAC960_MAJOR+7)
617                 (q->request_fn)(q);
618 }
619 
620 /*
621  * Must be called with io_request_lock held and interrupts disabled
622  */
623 void inline blkdev_release_request(struct request *req)
624 {
625         req->rq_status = RQ_INACTIVE;
626 
627         /*
628          * Request may not have originated from ll_rw_blk
629          */
630         if (req->free_list) {
631                 list_add(&req->table, req->free_list);
632                 req->free_list = NULL;
633                 wake_up(&req->q->wait_for_request);
634         }
635 }
636 
637 /*
638  * Has to be called with the request spinlock acquired
639  */
640 static void attempt_merge(request_queue_t * q,
641                           struct request *req,
642                           int max_sectors,
643                           int max_segments)
644 {
645         struct request *next;
646   
647         next = blkdev_next_request(req);
648         if (req->sector + req->nr_sectors != next->sector)
649                 return;
650         if (req->cmd != next->cmd
651             || req->rq_dev != next->rq_dev
652             || req->nr_sectors + next->nr_sectors > max_sectors
653             || next->sem)
654                 return;
655         /*
656          * If we are not allowed to merge these requests, then
657          * return.  If we are allowed to merge, then the count
658          * will have been updated to the appropriate number,
659          * and we shouldn't do it here too.
660          */
661         if(!(q->merge_requests_fn)(q, req, next, max_segments))
662                 return;
663 
664         req->bhtail->b_reqnext = next->bh;
665         req->bhtail = next->bhtail;
666         req->nr_sectors = req->hard_nr_sectors += next->hard_nr_sectors;
667         list_del(&next->queue);
668         blkdev_release_request(next);
669 }
670 
671 static inline void attempt_back_merge(request_queue_t * q,
672                                       struct request *req,
673                                       int max_sectors,
674                                       int max_segments)
675 {
676         if (&req->queue == q->queue_head.prev)
677                 return;
678         attempt_merge(q, req, max_sectors, max_segments);
679 }
680 
681 static inline void attempt_front_merge(request_queue_t * q,
682                                        struct list_head * head,
683                                        struct request *req,
684                                        int max_sectors,
685                                        int max_segments)
686 {
687         struct list_head * prev;
688 
689         prev = req->queue.prev;
690         if (head == prev)
691                 return;
692         attempt_merge(q, blkdev_entry_to_request(prev), max_sectors, max_segments);
693 }
694 
695 static int __make_request(request_queue_t * q, int rw,
696                                   struct buffer_head * bh)
697 {
698         unsigned int sector, count;
699         int max_segments = MAX_SEGMENTS;
700         struct request * req = NULL, *freereq = NULL;
701         int rw_ahead, max_sectors, el_ret;
702         struct list_head *head;
703         int latency;
704         elevator_t *elevator = &q->elevator;
705 
706         count = bh->b_size >> 9;
707         sector = bh->b_rsector;
708 
709         rw_ahead = 0;   /* normal case; gets changed below for READA */
710         switch (rw) {
711                 case READA:
712                         rw_ahead = 1;
713                         rw = READ;      /* drop into READ */
714                 case READ:
715                 case WRITE:
716                         break;
717                 default:
718                         BUG();
719                         goto end_io;
720         }
721 
722         /* We'd better have a real physical mapping!
723            Check this bit only if the buffer was dirty and just locked
724            down by us so at this point flushpage will block and
725            won't clear the mapped bit under us. */
726         if (!buffer_mapped(bh))
727                 BUG();
728 
729         /*
730          * Temporary solution - in 2.5 this will be done by the lowlevel
731          * driver. Create a bounce buffer if the buffer data points into
732          * high memory - keep the original buffer otherwise.
733          */
734 #if CONFIG_HIGHMEM
735         bh = create_bounce(rw, bh);
736 #endif
737 
738 /* look for a free request. */
739         /*
740          * Try to coalesce the new request with old requests
741          */
742         max_sectors = get_max_sectors(bh->b_rdev);
743 
744         latency = elevator_request_latency(elevator, rw);
745 
746         /*
747          * Now we acquire the request spinlock, we have to be mega careful
748          * not to schedule or do something nonatomic
749          */
750 again:
751         spin_lock_irq(&io_request_lock);
752 
753         /*
754          * skip first entry, for devices with active queue head
755          */
756         head = &q->queue_head;
757         if (q->head_active && !q->plugged)
758                 head = head->next;
759 
760         if (list_empty(head)) {
761                 q->plug_device_fn(q, bh->b_rdev); /* is atomic */
762                 goto get_rq;
763         }
764 
765         el_ret = elevator->elevator_merge_fn(q, &req, bh, rw,
766                                              &max_sectors, &max_segments);
767         switch (el_ret) {
768 
769                 case ELEVATOR_BACK_MERGE:
770                         if (!q->back_merge_fn(q, req, bh, max_segments))
771                                 break;
772                         req->bhtail->b_reqnext = bh;
773                         req->bhtail = bh;
774                         req->nr_sectors = req->hard_nr_sectors += count;
775                         req->e = elevator;
776                         drive_stat_acct(req->rq_dev, req->cmd, count, 0);
777                         attempt_back_merge(q, req, max_sectors, max_segments);
778                         goto out;
779 
780                 case ELEVATOR_FRONT_MERGE:
781                         if (!q->front_merge_fn(q, req, bh, max_segments))
782                                 break;
783                         bh->b_reqnext = req->bh;
784                         req->bh = bh;
785                         req->buffer = bh->b_data;
786                         req->current_nr_sectors = count;
787                         req->sector = req->hard_sector = sector;
788                         req->nr_sectors = req->hard_nr_sectors += count;
789                         req->e = elevator;
790                         drive_stat_acct(req->rq_dev, req->cmd, count, 0);
791                         attempt_front_merge(q, head, req, max_sectors, max_segments);
792                         goto out;
793                 /*
794                  * elevator says don't/can't merge. get new request
795                  */
796                 case ELEVATOR_NO_MERGE:
797                         break;
798 
799                 default:
800                         printk("elevator returned crap (%d)\n", el_ret);
801                         BUG();
802         }
803                 
804         /*
805          * Grab a free request from the freelist. Read first try their
806          * own queue - if that is empty, we steal from the write list.
807          * Writes must block if the write list is empty, and read aheads
808          * are not crucial.
809          */
810 get_rq:
811         if (freereq) {
812                 req = freereq;
813                 freereq = NULL;
814         } else if ((req = get_request(q, rw)) == NULL) {
815                 spin_unlock_irq(&io_request_lock);
816                 if (rw_ahead)
817                         goto end_io;
818 
819                 freereq = __get_request_wait(q, rw);
820                 goto again;
821         }
822 
823 /* fill up the request-info, and add it to the queue */
824         req->cmd = rw;
825         req->errors = 0;
826         req->hard_sector = req->sector = sector;
827         req->hard_nr_sectors = req->nr_sectors = count;
828         req->current_nr_sectors = count;
829         req->nr_segments = 1; /* Always 1 for a new request. */
830         req->nr_hw_segments = 1; /* Always 1 for a new request. */
831         req->buffer = bh->b_data;
832         req->sem = NULL;
833         req->bh = bh;
834         req->bhtail = bh;
835         req->rq_dev = bh->b_rdev;
836         req->e = elevator;
837         add_request(q, req, head, latency);
838 out:
839         if (!q->plugged)
840                 (q->request_fn)(q);
841         if (freereq)
842                 blkdev_release_request(freereq);
843         spin_unlock_irq(&io_request_lock);
844         return 0;
845 end_io:
846         bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
847         return 0;
848 }
849 
850 /**
851  * generic_make_request: hand a buffer head to it's device driver for I/O
852  * @rw:  READ, WRITE, or READA - what sort of I/O is desired.
853  * @bh:  The buffer head describing the location in memory and on the device.
854  *
855  * generic_make_request() is used to make I/O requests of block
856  * devices. It is passed a &struct buffer_head and a &rw value.  The
857  * %READ and %WRITE options are (hopefully) obvious in meaning.  The
858  * %READA value means that a read is required, but that the driver is
859  * free to fail the request if, for example, it cannot get needed
860  * resources immediately.
861  *
862  * generic_make_request() does not return any status.  The
863  * success/failure status of the request, along with notification of
864  * completion, is delivered asynchronously through the bh->b_end_io
865  * function described (one day) else where.
866  *
867  * The caller of generic_make_request must make sure that b_page,
868  * b_addr, b_size are set to describe the memory buffer, that b_rdev
869  * and b_rsector are set to describe the device address, and the
870  * b_end_io and optionally b_private are set to describe how
871  * completion notification should be signaled.  BH_Mapped should also
872  * be set (to confirm that b_dev and b_blocknr are valid).
873  *
874  * generic_make_request and the drivers it calls may use b_reqnext,
875  * and may change b_rdev and b_rsector.  So the values of these fields
876  * should NOT be depended on after the call to generic_make_request.
877  * Because of this, the caller should record the device address
878  * information in b_dev and b_blocknr.
879  *
880  * Apart from those fields mentioned above, no other fields, and in
881  * particular, no other flags, are changed by generic_make_request or
882  * any lower level drivers.
883  * */
884 void generic_make_request (int rw, struct buffer_head * bh)
885 {
886         int major = MAJOR(bh->b_rdev);
887         request_queue_t *q;
888 
889         if (!bh->b_end_io) BUG();
890         if (blk_size[major]) {
891                 unsigned long maxsector = (blk_size[major][MINOR(bh->b_rdev)] << 1) + 1;
892                 unsigned int sector, count;
893 
894                 count = bh->b_size >> 9;
895                 sector = bh->b_rsector;
896 
897                 if (maxsector < count || maxsector - count < sector) {
898                         bh->b_state &= (1 << BH_Lock) | (1 << BH_Mapped);
899                         if (blk_size[major][MINOR(bh->b_rdev)]) {
900                                 
901                                 /* This may well happen - the kernel calls bread()
902                                    without checking the size of the device, e.g.,
903                                    when mounting a device. */
904                                 printk(KERN_INFO
905                                        "attempt to access beyond end of device\n");
906                                 printk(KERN_INFO "%s: rw=%d, want=%d, limit=%d\n",
907                                        kdevname(bh->b_rdev), rw,
908                                        (sector + count)>>1,
909                                        blk_size[major][MINOR(bh->b_rdev)]);
910                         }
911                         bh->b_end_io(bh, 0);
912                         return;
913                 }
914         }
915 
916         /*
917          * Resolve the mapping until finished. (drivers are
918          * still free to implement/resolve their own stacking
919          * by explicitly returning 0)
920          */
921         /* NOTE: we don't repeat the blk_size check for each new device.
922          * Stacking drivers are expected to know what they are doing.
923          */
924         do {
925                 q = blk_get_queue(bh->b_rdev);
926                 if (!q) {
927                         printk(KERN_ERR
928                                "generic_make_request: Trying to access nonexistent block-device %s (%ld)\n",
929                                kdevname(bh->b_rdev), bh->b_rsector);
930                         buffer_IO_error(bh);
931                         break;
932                 }
933 
934         }
935         while (q->make_request_fn(q, rw, bh));
936 }
937 
938 
939 /**
940  * submit_bh: submit a buffer_head to the block device later for I/O
941  * @rw: whether to %READ or %WRITE, or mayve to %READA (read ahead)
942  * @bh: The &struct buffer_head which describes the I/O
943  *
944  * submit_bh() is very similar in purpose to generic_make_request(), and
945  * uses that function to do most of the work.
946  *
947  * The extra functionality provided by submit_bh is to determine
948  * b_rsector from b_blocknr and b_size, and to set b_rdev from b_dev.
949  * This is is appropriate for IO requests that come from the buffer
950  * cache and page cache which (currently) always use aligned blocks.
951  */
952 void submit_bh(int rw, struct buffer_head * bh)
953 {
954         if (!test_bit(BH_Lock, &bh->b_state))
955                 BUG();
956 
957         set_bit(BH_Req, &bh->b_state);
958 
959         /*
960          * First step, 'identity mapping' - RAID or LVM might
961          * further remap this.
962          */
963         bh->b_rdev = bh->b_dev;
964         bh->b_rsector = bh->b_blocknr * (bh->b_size>>9);
965 
966         generic_make_request(rw, bh);
967 
968         switch (rw) {
969                 case WRITE:
970                         kstat.pgpgout++;
971                         break;
972                 default:
973                         kstat.pgpgin++;
974                         break;
975         }
976 }
977 
978 /*
979  * Default IO end handler, used by "ll_rw_block()".
980  */
981 static void end_buffer_io_sync(struct buffer_head *bh, int uptodate)
982 {
983         mark_buffer_uptodate(bh, uptodate);
984         unlock_buffer(bh);
985 }
986 
987 /**
988  * ll_rw_block: low-level access to block devices
989  * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
990  * @nr: number of &struct buffer_heads in the array
991  * @bhs: array of pointers to &struct buffer_head
992  *
993  * ll_rw_block() takes an array of pointers to &struct buffer_heads,
994  * and requests an I/O operation on them, either a %READ or a %WRITE.
995  * The third %READA option is described in the documentation for
996  * generic_make_request() which ll_rw_block() calls.
997  *
998  * This function provides extra functionality that is not in
999  * generic_make_request() that is relevant to buffers in the buffer
1000  * cache or page cache.  In particular it drops any buffer that it
1001  * cannot get a lock on (with the BH_Lock state bit), any buffer that
1002  * appears to be clean when doing a write request, and any buffer that
1003  * appears to be up-to-date when doing read request.  Further it marks
1004  * as clean buffers that are processed for writing (the buffer cache
1005  * wont assume that they are actually clean until the buffer gets
1006  * unlocked).
1007  *
1008  * ll_rw_block sets b_end_io to simple completion handler that marks
1009  * the buffer up-to-date (if approriate), unlocks the buffer and wakes
1010  * any waiters.  As client that needs a more interesting completion
1011  * routine should call submit_bh() (or generic_make_request())
1012  * directly.
1013  *
1014  * Caveat:
1015  *  All of the buffers must be for the same device, and must also be
1016  *  of the current approved size for the device.  */
1017 
1018 void ll_rw_block(int rw, int nr, struct buffer_head * bhs[])
1019 {
1020         unsigned int major;
1021         int correct_size;
1022         int i;
1023 
1024         major = MAJOR(bhs[0]->b_dev);
1025 
1026         /* Determine correct block size for this device. */
1027         correct_size = BLOCK_SIZE;
1028         if (blksize_size[major]) {
1029                 i = blksize_size[major][MINOR(bhs[0]->b_dev)];
1030                 if (i)
1031                         correct_size = i;
1032         }
1033 
1034         /* Verify requested block sizes. */
1035         for (i = 0; i < nr; i++) {
1036                 struct buffer_head *bh;
1037                 bh = bhs[i];
1038                 if (bh->b_size != correct_size) {
1039                         printk(KERN_NOTICE "ll_rw_block: device %s: "
1040                                "only %d-char blocks implemented (%u)\n",
1041                                kdevname(bhs[0]->b_dev),
1042                                correct_size, bh->b_size);
1043                         goto sorry;
1044                 }
1045         }
1046 
1047         if ((rw & WRITE) && is_read_only(bhs[0]->b_dev)) {
1048                 printk(KERN_NOTICE "Can't write to read-only device %s\n",
1049                        kdevname(bhs[0]->b_dev));
1050                 goto sorry;
1051         }
1052 
1053         for (i = 0; i < nr; i++) {
1054                 struct buffer_head *bh;
1055                 bh = bhs[i];
1056 
1057                 /* Only one thread can actually submit the I/O. */
1058                 if (test_and_set_bit(BH_Lock, &bh->b_state))
1059                         continue;
1060 
1061                 /* We have the buffer lock */
1062                 bh->b_end_io = end_buffer_io_sync;
1063 
1064                 switch(rw) {
1065                 case WRITE:
1066                         if (!atomic_set_buffer_clean(bh))
1067                                 /* Hmmph! Nothing to write */
1068                                 goto end_io;
1069                         __mark_buffer_clean(bh);
1070                         break;
1071 
1072                 case READA:
1073                 case READ:
1074                         if (buffer_uptodate(bh))
1075                                 /* Hmmph! Already have it */
1076                                 goto end_io;
1077                         break;
1078                 default:
1079                         BUG();
1080         end_io:
1081                         bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state));
1082                         continue;
1083                 }
1084 
1085                 submit_bh(rw, bh);
1086         }
1087         return;
1088 
1089 sorry:
1090         /* Make sure we don't get infinite dirty retries.. */
1091         for (i = 0; i < nr; i++)
1092                 mark_buffer_clean(bhs[i]);
1093 }
1094 
1095 #ifdef CONFIG_STRAM_SWAP
1096 extern int stram_device_init (void);
1097 #endif
1098 
1099 /*
1100  * First step of what used to be end_request
1101  *
1102  * 0 means continue with end_that_request_last,
1103  * 1 means we are done
1104  */
1105 
1106 int end_that_request_first (struct request *req, int uptodate, char *name)
1107 {
1108         struct buffer_head * bh;
1109         int nsect;
1110 
1111         req->errors = 0;
1112         if (!uptodate)
1113                 printk("end_request: I/O error, dev %s (%s), sector %lu\n",
1114                         kdevname(req->rq_dev), name, req->sector);
1115 
1116         if ((bh = req->bh) != NULL) {
1117                 nsect = bh->b_size >> 9;
1118                 req->bh = bh->b_reqnext;
1119                 bh->b_reqnext = NULL;
1120                 bh->b_end_io(bh, uptodate);
1121                 if ((bh = req->bh) != NULL) {
1122                         req->hard_sector += nsect;
1123                         req->hard_nr_sectors -= nsect;
1124                         req->sector = req->hard_sector;
1125                         req->nr_sectors = req->hard_nr_sectors;
1126 
1127                         req->current_nr_sectors = bh->b_size >> 9;
1128                         if (req->nr_sectors < req->current_nr_sectors) {
1129                                 req->nr_sectors = req->current_nr_sectors;
1130                                 printk("end_request: buffer-list destroyed\n");
1131                         }
1132                         req->buffer = bh->b_data;
1133                         return 1;
1134                 }
1135         }
1136         return 0;
1137 }
1138 
1139 void end_that_request_last(struct request *req)
1140 {
1141         if (req->e) {
1142                 printk("end_that_request_last called with non-dequeued req\n");
1143                 BUG();
1144         }
1145         if (req->sem != NULL)
1146                 up(req->sem);
1147 
1148         blkdev_release_request(req);
1149 }
1150 
1151 int __init blk_dev_init(void)
1152 {
1153         struct blk_dev_struct *dev;
1154 
1155         request_cachep = kmem_cache_create("blkdev_requests",
1156                                            sizeof(struct request),
1157                                            0, SLAB_HWCACHE_ALIGN, NULL, NULL);
1158 
1159         if (!request_cachep)
1160                 panic("Can't create request pool slab cache\n");
1161 
1162         for (dev = blk_dev + MAX_BLKDEV; dev-- != blk_dev;)
1163                 dev->queue = NULL;
1164 
1165         memset(ro_bits,0,sizeof(ro_bits));
1166         memset(max_readahead, 0, sizeof(max_readahead));
1167         memset(max_sectors, 0, sizeof(max_sectors));
1168 #ifdef CONFIG_AMIGA_Z2RAM
1169         z2_init();
1170 #endif
1171 #ifdef CONFIG_STRAM_SWAP
1172         stram_device_init();
1173 #endif
1174 #ifdef CONFIG_BLK_DEV_RAM
1175         rd_init();
1176 #endif
1177 #ifdef CONFIG_BLK_DEV_LOOP
1178         loop_init();
1179 #endif
1180 #ifdef CONFIG_ISP16_CDI
1181         isp16_init();
1182 #endif
1183 #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_IDE)
1184         ide_init();             /* this MUST precede hd_init */
1185 #endif
1186 #if defined(CONFIG_IDE) && defined(CONFIG_BLK_DEV_HD)
1187         hd_init();
1188 #endif
1189 #ifdef CONFIG_BLK_DEV_PS2
1190         ps2esdi_init();
1191 #endif
1192 #ifdef CONFIG_BLK_DEV_XD
1193         xd_init();
1194 #endif
1195 #ifdef CONFIG_BLK_DEV_MFM
1196         mfm_init();
1197 #endif
1198 #ifdef CONFIG_PARIDE
1199         { extern void paride_init(void); paride_init(); };
1200 #endif
1201 #ifdef CONFIG_MAC_FLOPPY
1202         swim3_init();
1203 #endif
1204 #ifdef CONFIG_BLK_DEV_SWIM_IOP
1205         swimiop_init();
1206 #endif
1207 #ifdef CONFIG_AMIGA_FLOPPY
1208         amiga_floppy_init();
1209 #endif
1210 #ifdef CONFIG_ATARI_FLOPPY
1211         atari_floppy_init();
1212 #endif
1213 #ifdef CONFIG_BLK_DEV_FD
1214         floppy_init();
1215 #else
1216 #if defined(__i386__)   /* Do we even need this? */
1217         outb_p(0xc, 0x3f2);
1218 #endif
1219 #endif
1220 #ifdef CONFIG_CDU31A
1221         cdu31a_init();
1222 #endif
1223 #ifdef CONFIG_ATARI_ACSI
1224         acsi_init();
1225 #endif
1226 #ifdef CONFIG_MCD
1227         mcd_init();
1228 #endif
1229 #ifdef CONFIG_MCDX
1230         mcdx_init();
1231 #endif
1232 #ifdef CONFIG_SBPCD
1233         sbpcd_init();
1234 #endif
1235 #ifdef CONFIG_AZTCD
1236         aztcd_init();
1237 #endif
1238 #ifdef CONFIG_CDU535
1239         sony535_init();
1240 #endif
1241 #ifdef CONFIG_GSCD
1242         gscd_init();
1243 #endif
1244 #ifdef CONFIG_CM206
1245         cm206_init();
1246 #endif
1247 #ifdef CONFIG_OPTCD
1248         optcd_init();
1249 #endif
1250 #ifdef CONFIG_SJCD
1251         sjcd_init();
1252 #endif
1253 #ifdef CONFIG_APBLOCK
1254         ap_init();
1255 #endif
1256 #ifdef CONFIG_DDV
1257         ddv_init();
1258 #endif
1259 #ifdef CONFIG_BLK_DEV_NBD
1260         nbd_init();
1261 #endif
1262 #ifdef CONFIG_MDISK
1263         mdisk_init();
1264 #endif
1265 #ifdef CONFIG_DASD
1266         dasd_init();
1267 #endif
1268 #ifdef CONFIG_SUN_JSFLASH
1269         jsfd_init();
1270 #endif
1271 #ifdef CONFIG_BLK_DEV_LVM
1272         lvm_init();
1273 #endif
1274         return 0;
1275 };
1276 
1277 EXPORT_SYMBOL(io_request_lock);
1278 EXPORT_SYMBOL(end_that_request_first);
1279 EXPORT_SYMBOL(end_that_request_last);
1280 EXPORT_SYMBOL(blk_init_queue);
1281 EXPORT_SYMBOL(blk_get_queue);
1282 EXPORT_SYMBOL(blk_cleanup_queue);
1283 EXPORT_SYMBOL(blk_queue_headactive);
1284 EXPORT_SYMBOL(blk_queue_pluggable);
1285 EXPORT_SYMBOL(blk_queue_make_request);
1286 EXPORT_SYMBOL(generic_make_request);
1287 EXPORT_SYMBOL(blkdev_release_request);
1288 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.