~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/fs/buffer.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *  linux/fs/buffer.c
  3  *
  4  *  Copyright (C) 1991, 1992  Linus Torvalds
  5  */
  6 
  7 /*
  8  *  'buffer.c' implements the buffer-cache functions. Race-conditions have
  9  * been avoided by NEVER letting an interrupt change a buffer (except for the
 10  * data, of course), but instead letting the caller do it.
 11  */
 12 
 13 /* Start bdflush() with kernel_thread not syscall - Paul Gortmaker, 12/95 */
 14 
 15 /* Removed a lot of unnecessary code and simplified things now that
 16  * the buffer cache isn't our primary cache - Andrew Tridgell 12/96
 17  */
 18 
 19 /* Speed up hash, lru, and free list operations.  Use gfp() for allocating
 20  * hash table, use SLAB cache for buffer heads. -DaveM
 21  */
 22 
 23 /* Added 32k buffer block sizes - these are required older ARM systems.
 24  * - RMK
 25  */
 26 
 27 /* Thread it... -DaveM */
 28 
 29 /* async buffer flushing, 1999 Andrea Arcangeli <andrea@suse.de> */
 30 
 31 #include <linux/config.h>
 32 #include <linux/sched.h>
 33 #include <linux/fs.h>
 34 #include <linux/malloc.h>
 35 #include <linux/locks.h>
 36 #include <linux/errno.h>
 37 #include <linux/swap.h>
 38 #include <linux/swapctl.h>
 39 #include <linux/smp_lock.h>
 40 #include <linux/vmalloc.h>
 41 #include <linux/blkdev.h>
 42 #include <linux/sysrq.h>
 43 #include <linux/file.h>
 44 #include <linux/init.h>
 45 #include <linux/quotaops.h>
 46 #include <linux/iobuf.h>
 47 #include <linux/highmem.h>
 48 
 49 #include <asm/uaccess.h>
 50 #include <asm/io.h>
 51 #include <asm/bitops.h>
 52 #include <asm/mmu_context.h>
 53 
 54 #define NR_SIZES 7
 55 static char buffersize_index[65] =
 56 {-1,  0,  1, -1,  2, -1, -1, -1, 3, -1, -1, -1, -1, -1, -1, -1,
 57   4, -1, -1, -1, -1, -1, -1, -1, -1,-1, -1, -1, -1, -1, -1, -1,
 58   5, -1, -1, -1, -1, -1, -1, -1, -1,-1, -1, -1, -1, -1, -1, -1,
 59  -1, -1, -1, -1, -1, -1, -1, -1, -1,-1, -1, -1, -1, -1, -1, -1,
 60   6};
 61 
 62 #define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
 63 #define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)
 64 #define NR_RESERVED (2*MAX_BUF_PER_PAGE)
 65 #define MAX_UNUSED_BUFFERS NR_RESERVED+20 /* don't ever have more than this 
 66                                              number of unused buffer heads */
 67 
 68 /* Anti-deadlock ordering:
 69  *      lru_list_lock > hash_table_lock > free_list_lock > unused_list_lock
 70  */
 71 
 72 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_inode_buffers)
 73 
 74 /*
 75  * Hash table gook..
 76  */
 77 static unsigned int bh_hash_mask;
 78 static unsigned int bh_hash_shift;
 79 static struct buffer_head **hash_table;
 80 static rwlock_t hash_table_lock = RW_LOCK_UNLOCKED;
 81 
 82 static struct buffer_head *lru_list[NR_LIST];
 83 static spinlock_t lru_list_lock = SPIN_LOCK_UNLOCKED;
 84 static int nr_buffers_type[NR_LIST];
 85 static unsigned long size_buffers_type[NR_LIST];
 86 
 87 static struct buffer_head * unused_list;
 88 static int nr_unused_buffer_heads;
 89 static spinlock_t unused_list_lock = SPIN_LOCK_UNLOCKED;
 90 static DECLARE_WAIT_QUEUE_HEAD(buffer_wait);
 91 
 92 struct bh_free_head {
 93         struct buffer_head *list;
 94         spinlock_t lock;
 95 };
 96 static struct bh_free_head free_list[NR_SIZES];
 97 
 98 static int grow_buffers(int size);
 99 static void __refile_buffer(struct buffer_head *);
100 
101 /* This is used by some architectures to estimate available memory. */
102 atomic_t buffermem_pages = ATOMIC_INIT(0);
103 
104 /* Here is the parameter block for the bdflush process. If you add or
105  * remove any of the parameters, make sure to update kernel/sysctl.c.
106  */
107 
108 #define N_PARAM 9
109 
110 /* The dummy values in this structure are left in there for compatibility
111  * with old programs that play with the /proc entries.
112  */
113 union bdflush_param {
114         struct {
115                 int nfract;  /* Percentage of buffer cache dirty to 
116                                 activate bdflush */
117                 int ndirty;  /* Maximum number of dirty blocks to write out per
118                                 wake-cycle */
119                 int nrefill; /* Number of clean buffers to try to obtain
120                                 each time we call refill */
121                 int dummy1;   /* unused */
122                 int interval; /* jiffies delay between kupdate flushes */
123                 int age_buffer;  /* Time for normal buffer to age before we flush it */
124                 int nfract_sync; /* Percentage of buffer cache dirty to 
125                                     activate bdflush synchronously */
126                 int dummy2;    /* unused */
127                 int dummy3;    /* unused */
128         } b_un;
129         unsigned int data[N_PARAM];
130 } bdf_prm = {{30, 64, 64, 256, 5*HZ, 30*HZ, 60, 0, 0}};
131 
132 /* These are the min and max parameter values that we will allow to be assigned */
133 int bdflush_min[N_PARAM] = {  0,  10,    5,   25,  0,   1*HZ,   0, 0, 0};
134 int bdflush_max[N_PARAM] = {100,50000, 20000, 20000,600*HZ, 6000*HZ, 100, 0, 0};
135 
136 /*
137  * Rewrote the wait-routines to use the "new" wait-queue functionality,
138  * and getting rid of the cli-sti pairs. The wait-queue routines still
139  * need cli-sti, but now it's just a couple of 386 instructions or so.
140  *
141  * Note that the real wait_on_buffer() is an inline function that checks
142  * if 'b_wait' is set before calling this, so that the queues aren't set
143  * up unnecessarily.
144  */
145 void __wait_on_buffer(struct buffer_head * bh)
146 {
147         struct task_struct *tsk = current;
148         DECLARE_WAITQUEUE(wait, tsk);
149 
150         atomic_inc(&bh->b_count);
151         add_wait_queue(&bh->b_wait, &wait);
152         do {
153                 run_task_queue(&tq_disk);
154                 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
155                 if (!buffer_locked(bh))
156                         break;
157                 schedule();
158         } while (buffer_locked(bh));
159         tsk->state = TASK_RUNNING;
160         remove_wait_queue(&bh->b_wait, &wait);
161         atomic_dec(&bh->b_count);
162 }
163 
164 /* Call sync_buffers with wait!=0 to ensure that the call does not
165  * return until all buffer writes have completed.  Sync() may return
166  * before the writes have finished; fsync() may not.
167  */
168 
169 /* Godamity-damn.  Some buffers (bitmaps for filesystems)
170  * spontaneously dirty themselves without ever brelse being called.
171  * We will ultimately want to put these in a separate list, but for
172  * now we search all of the lists for dirty buffers.
173  */
174 static int sync_buffers(kdev_t dev, int wait)
175 {
176         int i, retry, pass = 0, err = 0;
177         struct buffer_head * bh, *next;
178 
179         /* One pass for no-wait, three for wait:
180          * 0) write out all dirty, unlocked buffers;
181          * 1) write out all dirty buffers, waiting if locked;
182          * 2) wait for completion by waiting for all buffers to unlock.
183          */
184         do {
185                 retry = 0;
186 
187                 /* We search all lists as a failsafe mechanism, not because we expect
188                  * there to be dirty buffers on any of the other lists.
189                  */
190 repeat:
191                 spin_lock(&lru_list_lock);
192                 bh = lru_list[BUF_DIRTY];
193                 if (!bh)
194                         goto repeat2;
195 
196                 for (i = nr_buffers_type[BUF_DIRTY]*2 ; i-- > 0 ; bh = next) {
197                         next = bh->b_next_free;
198 
199                         if (!lru_list[BUF_DIRTY])
200                                 break;
201                         if (dev && bh->b_dev != dev)
202                                 continue;
203                         if (buffer_locked(bh)) {
204                                 /* Buffer is locked; skip it unless wait is
205                                  * requested AND pass > 0.
206                                  */
207                                 if (!wait || !pass) {
208                                         retry = 1;
209                                         continue;
210                                 }
211                                 atomic_inc(&bh->b_count);
212                                 spin_unlock(&lru_list_lock);
213                                 wait_on_buffer (bh);
214                                 atomic_dec(&bh->b_count);
215                                 goto repeat;
216                         }
217 
218                         /* If an unlocked buffer is not uptodate, there has
219                          * been an IO error. Skip it.
220                          */
221                         if (wait && buffer_req(bh) && !buffer_locked(bh) &&
222                             !buffer_dirty(bh) && !buffer_uptodate(bh)) {
223                                 err = -EIO;
224                                 continue;
225                         }
226 
227                         /* Don't write clean buffers.  Don't write ANY buffers
228                          * on the third pass.
229                          */
230                         if (!buffer_dirty(bh) || pass >= 2)
231                                 continue;
232 
233                         atomic_inc(&bh->b_count);
234                         spin_unlock(&lru_list_lock);
235                         ll_rw_block(WRITE, 1, &bh);
236                         atomic_dec(&bh->b_count);
237                         retry = 1;
238                         goto repeat;
239                 }
240 
241     repeat2:
242                 bh = lru_list[BUF_LOCKED];
243                 if (!bh) {
244                         spin_unlock(&lru_list_lock);
245                         break;
246                 }
247                 for (i = nr_buffers_type[BUF_LOCKED]*2 ; i-- > 0 ; bh = next) {
248                         next = bh->b_next_free;
249 
250                         if (!lru_list[BUF_LOCKED])
251                                 break;
252                         if (dev && bh->b_dev != dev)
253                                 continue;
254                         if (buffer_locked(bh)) {
255                                 /* Buffer is locked; skip it unless wait is
256                                  * requested AND pass > 0.
257                                  */
258                                 if (!wait || !pass) {
259                                         retry = 1;
260                                         continue;
261                                 }
262                                 atomic_inc(&bh->b_count);
263                                 spin_unlock(&lru_list_lock);
264                                 wait_on_buffer (bh);
265                                 spin_lock(&lru_list_lock);
266                                 atomic_dec(&bh->b_count);
267                                 goto repeat2;
268                         }
269                 }
270                 spin_unlock(&lru_list_lock);
271 
272                 /* If we are waiting for the sync to succeed, and if any dirty
273                  * blocks were written, then repeat; on the second pass, only
274                  * wait for buffers being written (do not pass to write any
275                  * more buffers on the second pass).
276                  */
277         } while (wait && retry && ++pass<=2);
278         return err;
279 }
280 
281 void sync_dev(kdev_t dev)
282 {
283         sync_supers(dev);
284         sync_inodes(dev);
285         DQUOT_SYNC(dev);
286         /* sync all the dirty buffers out to disk only _after_ all the
287            high level layers finished generated buffer dirty data
288            (or we'll return with some buffer still dirty on the blockdevice
289            so breaking the semantics of this call) */
290         sync_buffers(dev, 0);
291         /*
292          * FIXME(eric) we need to sync the physical devices here.
293          * This is because some (scsi) controllers have huge amounts of
294          * cache onboard (hundreds of Mb), and we need to instruct
295          * them to commit all of the dirty memory to disk, and we should
296          * not return until this has happened.
297          *
298          * This would need to get implemented by going through the assorted
299          * layers so that each block major number can be synced, and this
300          * would call down into the upper and mid-layer scsi.
301          */
302 }
303 
304 int fsync_dev(kdev_t dev)
305 {
306         sync_buffers(dev, 0);
307 
308         lock_kernel();
309         sync_supers(dev);
310         sync_inodes(dev);
311         DQUOT_SYNC(dev);
312         unlock_kernel();
313 
314         return sync_buffers(dev, 1);
315 }
316 
317 asmlinkage long sys_sync(void)
318 {
319         fsync_dev(0);
320         return 0;
321 }
322 
323 /*
324  *      filp may be NULL if called via the msync of a vma.
325  */
326  
327 int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
328 {
329         struct inode * inode = dentry->d_inode;
330         struct super_block * sb;
331         kdev_t dev;
332         int ret;
333 
334         lock_kernel();
335         /* sync the inode to buffers */
336         write_inode_now(inode, 0);
337 
338         /* sync the superblock to buffers */
339         sb = inode->i_sb;
340         lock_super(sb);
341         if (sb->s_op && sb->s_op->write_super)
342                 sb->s_op->write_super(sb);
343         unlock_super(sb);
344 
345         /* .. finally sync the buffers to disk */
346         dev = inode->i_dev;
347         ret = sync_buffers(dev, 1);
348         unlock_kernel();
349         return ret;
350 }
351 
352 asmlinkage long sys_fsync(unsigned int fd)
353 {
354         struct file * file;
355         struct dentry * dentry;
356         struct inode * inode;
357         int err;
358 
359         err = -EBADF;
360         file = fget(fd);
361         if (!file)
362                 goto out;
363 
364         dentry = file->f_dentry;
365         inode = dentry->d_inode;
366 
367         err = -EINVAL;
368         if (!file->f_op || !file->f_op->fsync)
369                 goto out_putf;
370 
371         /* We need to protect against concurrent writers.. */
372         down(&inode->i_sem);
373         filemap_fdatasync(inode->i_mapping);
374         err = file->f_op->fsync(file, dentry, 0);
375         filemap_fdatawait(inode->i_mapping);
376         up(&inode->i_sem);
377 
378 out_putf:
379         fput(file);
380 out:
381         return err;
382 }
383 
384 asmlinkage long sys_fdatasync(unsigned int fd)
385 {
386         struct file * file;
387         struct dentry * dentry;
388         struct inode * inode;
389         int err;
390 
391         err = -EBADF;
392         file = fget(fd);
393         if (!file)
394                 goto out;
395 
396         dentry = file->f_dentry;
397         inode = dentry->d_inode;
398 
399         err = -EINVAL;
400         if (!file->f_op || !file->f_op->fsync)
401                 goto out_putf;
402 
403         down(&inode->i_sem);
404         filemap_fdatasync(inode->i_mapping);
405         err = file->f_op->fsync(file, dentry, 1);
406         filemap_fdatawait(inode->i_mapping);
407         up(&inode->i_sem);
408 
409 out_putf:
410         fput(file);
411 out:
412         return err;
413 }
414 
415 /* After several hours of tedious analysis, the following hash
416  * function won.  Do not mess with it... -DaveM
417  */
418 #define _hashfn(dev,block)      \
419         ((((dev)<<(bh_hash_shift - 6)) ^ ((dev)<<(bh_hash_shift - 9))) ^ \
420          (((block)<<(bh_hash_shift - 6)) ^ ((block) >> 13) ^ \
421           ((block) << (bh_hash_shift - 12))))
422 #define hash(dev,block) hash_table[(_hashfn(HASHDEV(dev),block) & bh_hash_mask)]
423 
424 static __inline__ void __hash_link(struct buffer_head *bh, struct buffer_head **head)
425 {
426         if ((bh->b_next = *head) != NULL)
427                 bh->b_next->b_pprev = &bh->b_next;
428         *head = bh;
429         bh->b_pprev = head;
430 }
431 
432 static __inline__ void __hash_unlink(struct buffer_head *bh)
433 {
434         if (bh->b_pprev) {
435                 if (bh->b_next)
436                         bh->b_next->b_pprev = bh->b_pprev;
437                 *(bh->b_pprev) = bh->b_next;
438                 bh->b_pprev = NULL;
439         }
440 }
441 
442 static void __insert_into_lru_list(struct buffer_head * bh, int blist)
443 {
444         struct buffer_head **bhp = &lru_list[blist];
445 
446         if(!*bhp) {
447                 *bhp = bh;
448                 bh->b_prev_free = bh;
449         }
450         bh->b_next_free = *bhp;
451         bh->b_prev_free = (*bhp)->b_prev_free;
452         (*bhp)->b_prev_free->b_next_free = bh;
453         (*bhp)->b_prev_free = bh;
454         nr_buffers_type[blist]++;
455         size_buffers_type[blist] += bh->b_size;
456 }
457 
458 static void __remove_from_lru_list(struct buffer_head * bh, int blist)
459 {
460         if (bh->b_prev_free || bh->b_next_free) {
461                 bh->b_prev_free->b_next_free = bh->b_next_free;
462                 bh->b_next_free->b_prev_free = bh->b_prev_free;
463                 if (lru_list[blist] == bh)
464                         lru_list[blist] = bh->b_next_free;
465                 if (lru_list[blist] == bh)
466                         lru_list[blist] = NULL;
467                 bh->b_next_free = bh->b_prev_free = NULL;
468                 nr_buffers_type[blist]--;
469                 size_buffers_type[blist] -= bh->b_size;
470         }
471 }
472 
473 static void __remove_from_free_list(struct buffer_head * bh, int index)
474 {
475         if(bh->b_next_free == bh)
476                  free_list[index].list = NULL;
477         else {
478                 bh->b_prev_free->b_next_free = bh->b_next_free;
479                 bh->b_next_free->b_prev_free = bh->b_prev_free;
480                 if (free_list[index].list == bh)
481                          free_list[index].list = bh->b_next_free;
482         }
483         bh->b_next_free = bh->b_prev_free = NULL;
484 }
485 
486 /* must be called with both the hash_table_lock and the lru_list_lock
487    held */
488 static void __remove_from_queues(struct buffer_head *bh)
489 {
490         __hash_unlink(bh);
491         __remove_from_lru_list(bh, bh->b_list);
492 }
493 
494 static void __insert_into_queues(struct buffer_head *bh)
495 {
496         struct buffer_head **head = &hash(bh->b_dev, bh->b_blocknr);
497 
498         __hash_link(bh, head);
499         __insert_into_lru_list(bh, bh->b_list);
500 }
501 
502 /* This function must only run if there are no other
503  * references _anywhere_ to this buffer head.
504  */
505 static void put_last_free(struct buffer_head * bh)
506 {
507         struct bh_free_head *head = &free_list[BUFSIZE_INDEX(bh->b_size)];
508         struct buffer_head **bhp = &head->list;
509 
510         bh->b_state = 0;
511 
512         spin_lock(&head->lock);
513         bh->b_dev = B_FREE;
514         if(!*bhp) {
515                 *bhp = bh;
516                 bh->b_prev_free = bh;
517         }
518         bh->b_next_free = *bhp;
519         bh->b_prev_free = (*bhp)->b_prev_free;
520         (*bhp)->b_prev_free->b_next_free = bh;
521         (*bhp)->b_prev_free = bh;
522         spin_unlock(&head->lock);
523 }
524 
525 /*
526  * Why like this, I hear you say... The reason is race-conditions.
527  * As we don't lock buffers (unless we are reading them, that is),
528  * something might happen to it while we sleep (ie a read-error
529  * will force it bad). This shouldn't really happen currently, but
530  * the code is ready.
531  */
532 static inline struct buffer_head * __get_hash_table(kdev_t dev, int block, int size)
533 {
534         struct buffer_head *bh = hash(dev, block);
535 
536         for (; bh; bh = bh->b_next)
537                 if (bh->b_blocknr == block      &&
538                     bh->b_size    == size       &&
539                     bh->b_dev     == dev)
540                         break;
541         if (bh)
542                 atomic_inc(&bh->b_count);
543 
544         return bh;
545 }
546 
547 struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
548 {
549         struct buffer_head *bh;
550 
551         read_lock(&hash_table_lock);
552         bh = __get_hash_table(dev, block, size);
553         read_unlock(&hash_table_lock);
554 
555         return bh;
556 }
557 
558 unsigned int get_hardblocksize(kdev_t dev)
559 {
560         /*
561          * Get the hard sector size for the given device.  If we don't know
562          * what it is, return 0.
563          */
564         if (hardsect_size[MAJOR(dev)] != NULL) {
565                 int blksize = hardsect_size[MAJOR(dev)][MINOR(dev)];
566                 if (blksize != 0)
567                         return blksize;
568         }
569 
570         /*
571          * We don't know what the hardware sector size for this device is.
572          * Return 0 indicating that we don't know.
573          */
574         return 0;
575 }
576 
577 void buffer_insert_inode_queue(struct buffer_head *bh, struct inode *inode)
578 {
579         spin_lock(&lru_list_lock);
580         if (bh->b_inode)
581                 list_del(&bh->b_inode_buffers);
582         bh->b_inode = inode;
583         list_add(&bh->b_inode_buffers, &inode->i_dirty_buffers);
584         spin_unlock(&lru_list_lock);
585 }
586 
587 /* The caller must have the lru_list lock before calling the 
588    remove_inode_queue functions.  */
589 static void __remove_inode_queue(struct buffer_head *bh)
590 {
591         bh->b_inode = NULL;
592         list_del(&bh->b_inode_buffers);
593 }
594 
595 static inline void remove_inode_queue(struct buffer_head *bh)
596 {
597         if (bh->b_inode)
598                 __remove_inode_queue(bh);
599 }
600 
601 int inode_has_buffers(struct inode *inode)
602 {
603         int ret;
604         
605         spin_lock(&lru_list_lock);
606         ret = !list_empty(&inode->i_dirty_buffers);
607         spin_unlock(&lru_list_lock);
608         
609         return ret;
610 }
611 
612 
613 /* If invalidate_buffers() will trash dirty buffers, it means some kind
614    of fs corruption is going on. Trashing dirty data always imply losing
615    information that was supposed to be just stored on the physical layer
616    by the user.
617 
618    Thus invalidate_buffers in general usage is not allwowed to trash dirty
619    buffers. For example ioctl(FLSBLKBUF) expects dirty data to be preserved.
620 
621    NOTE: In the case where the user removed a removable-media-disk even if
622    there's still dirty data not synced on disk (due a bug in the device driver
623    or due an error of the user), by not destroying the dirty buffers we could
624    generate corruption also on the next media inserted, thus a parameter is
625    necessary to handle this case in the most safe way possible (trying
626    to not corrupt also the new disk inserted with the data belonging to
627    the old now corrupted disk). Also for the ramdisk the natural thing
628    to do in order to release the ramdisk memory is to destroy dirty buffers.
629 
630    These are two special cases. Normal usage imply the device driver
631    to issue a sync on the device (without waiting I/O completation) and
632    then an invalidate_buffers call that doesn't trash dirty buffers. */
633 void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers)
634 {
635         int i, nlist, slept;
636         struct buffer_head * bh, * bh_next;
637 
638  retry:
639         slept = 0;
640         spin_lock(&lru_list_lock);
641         for(nlist = 0; nlist < NR_LIST; nlist++) {
642                 bh = lru_list[nlist];
643                 if (!bh)
644                         continue;
645                 for (i = nr_buffers_type[nlist]; i > 0 ; bh = bh_next, i--) {
646                         bh_next = bh->b_next_free;
647 
648                         /* Another device? */
649                         if (bh->b_dev != dev)
650                                 continue;
651                         /* Part of a mapping? */
652                         if (bh->b_page->mapping)
653                                 continue;
654                         if (buffer_locked(bh)) {
655                                 atomic_inc(&bh->b_count);
656                                 spin_unlock(&lru_list_lock);
657                                 wait_on_buffer(bh);
658                                 slept = 1;
659                                 spin_lock(&lru_list_lock);
660                                 atomic_dec(&bh->b_count);
661                         }
662 
663                         write_lock(&hash_table_lock);
664                         if (!atomic_read(&bh->b_count) &&
665                             (destroy_dirty_buffers || !buffer_dirty(bh))) {
666                                 remove_inode_queue(bh);
667                                 __remove_from_queues(bh);
668                                 put_last_free(bh);
669                         }
670                         /* else complain loudly? */
671 
672                         write_unlock(&hash_table_lock);
673                         if (slept)
674                                 goto out;
675                 }
676         }
677 out:
678         spin_unlock(&lru_list_lock);
679         if (slept)
680                 goto retry;
681 }
682 
683 void set_blocksize(kdev_t dev, int size)
684 {
685         extern int *blksize_size[];
686         int i, nlist, slept;
687         struct buffer_head * bh, * bh_next;
688 
689         if (!blksize_size[MAJOR(dev)])
690                 return;
691 
692         /* Size must be a power of two, and between 512 and PAGE_SIZE */
693         if (size > PAGE_SIZE || size < 512 || (size & (size-1)))
694                 panic("Invalid blocksize passed to set_blocksize");
695 
696         if (blksize_size[MAJOR(dev)][MINOR(dev)] == 0 && size == BLOCK_SIZE) {
697                 blksize_size[MAJOR(dev)][MINOR(dev)] = size;
698                 return;
699         }
700         if (blksize_size[MAJOR(dev)][MINOR(dev)] == size)
701                 return;
702         sync_buffers(dev, 2);
703         blksize_size[MAJOR(dev)][MINOR(dev)] = size;
704 
705  retry:
706         slept = 0;
707         spin_lock(&lru_list_lock);
708         for(nlist = 0; nlist < NR_LIST; nlist++) {
709                 bh = lru_list[nlist];
710                 if (!bh)
711                         continue;
712                 for (i = nr_buffers_type[nlist]; i > 0 ; bh = bh_next, i--) {
713                         bh_next = bh->b_next_free;
714                         if (bh->b_dev != dev || bh->b_size == size)
715                                 continue;
716                         if (buffer_locked(bh)) {
717                                 atomic_inc(&bh->b_count);
718                                 spin_unlock(&lru_list_lock);
719                                 wait_on_buffer(bh);
720                                 slept = 1;
721                                 spin_lock(&lru_list_lock);
722                                 atomic_dec(&bh->b_count);
723                         }
724 
725                         write_lock(&hash_table_lock);
726                         if (!atomic_read(&bh->b_count)) {
727                                 if (buffer_dirty(bh))
728                                         printk(KERN_WARNING
729                                                "set_blocksize: dev %s buffer_dirty %lu size %hu\n",
730                                                kdevname(dev), bh->b_blocknr, bh->b_size);
731                                 remove_inode_queue(bh);
732                                 __remove_from_queues(bh);
733                                 put_last_free(bh);
734                         } else {
735                                 if (atomic_set_buffer_clean(bh))
736                                         __refile_buffer(bh);
737                                 clear_bit(BH_Uptodate, &bh->b_state);
738                                 printk(KERN_WARNING
739                                        "set_blocksize: "
740                                        "b_count %d, dev %s, block %lu, from %p\n",
741                                        atomic_read(&bh->b_count), bdevname(bh->b_dev),
742                                        bh->b_blocknr, __builtin_return_address(0));
743                         }
744                         write_unlock(&hash_table_lock);
745                         if (slept)
746                                 goto out;
747                 }
748         }
749  out:
750         spin_unlock(&lru_list_lock);
751         if (slept)
752                 goto retry;
753 }
754 
755 /*
756  * We used to try various strange things. Let's not.
757  * We'll just try to balance dirty buffers, and possibly
758  * launder some pages.
759  */
760 static void refill_freelist(int size)
761 {
762         balance_dirty(NODEV);
763         if (free_shortage())
764                 page_launder(GFP_BUFFER, 0);
765         grow_buffers(size);
766 }
767 
768 void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
769 {
770         bh->b_list = BUF_CLEAN;
771         bh->b_end_io = handler;
772         bh->b_private = private;
773 }
774 
775 static void end_buffer_io_async(struct buffer_head * bh, int uptodate)
776 {
777         static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED;
778         unsigned long flags;
779         struct buffer_head *tmp;
780         struct page *page;
781 
782         mark_buffer_uptodate(bh, uptodate);
783 
784         /* This is a temporary buffer used for page I/O. */
785         page = bh->b_page;
786 
787         if (!uptodate)
788                 SetPageError(page);
789 
790         /*
791          * Be _very_ careful from here on. Bad things can happen if
792          * two buffer heads end IO at almost the same time and both
793          * decide that the page is now completely done.
794          *
795          * Async buffer_heads are here only as labels for IO, and get
796          * thrown away once the IO for this page is complete.  IO is
797          * deemed complete once all buffers have been visited
798          * (b_count==0) and are now unlocked. We must make sure that
799          * only the _last_ buffer that decrements its count is the one
800          * that unlock the page..
801          */
802         spin_lock_irqsave(&page_uptodate_lock, flags);
803         unlock_buffer(bh);
804         atomic_dec(&bh->b_count);
805         tmp = bh->b_this_page;
806         while (tmp != bh) {
807                 if (tmp->b_end_io == end_buffer_io_async && buffer_locked(tmp))
808                         goto still_busy;
809                 tmp = tmp->b_this_page;
810         }
811 
812         /* OK, the async IO on this page is complete. */
813         spin_unlock_irqrestore(&page_uptodate_lock, flags);
814 
815         /*
816          * if none of the buffers had errors then we can set the
817          * page uptodate:
818          */
819         if (!PageError(page))
820                 SetPageUptodate(page);
821 
822         /*
823          * Run the hooks that have to be done when a page I/O has completed.
824          */
825         if (PageTestandClearDecrAfter(page))
826                 atomic_dec(&nr_async_pages);
827 
828         UnlockPage(page);
829 
830         return;
831 
832 still_busy:
833         spin_unlock_irqrestore(&page_uptodate_lock, flags);
834         return;
835 }
836 
837 /*
838  * Synchronise all the inode's dirty buffers to the disk.
839  *
840  * We have conflicting pressures: we want to make sure that all
841  * initially dirty buffers get waited on, but that any subsequently
842  * dirtied buffers don't.  After all, we don't want fsync to last
843  * forever if somebody is actively writing to the file.
844  *
845  * Do this in two main stages: first we copy dirty buffers to a
846  * temporary inode list, queueing the writes as we go.  Then we clean
847  * up, waiting for those writes to complete.
848  * 
849  * During this second stage, any subsequent updates to the file may end
850  * up refiling the buffer on the original inode's dirty list again, so
851  * there is a chance we will end up with a buffer queued for write but
852  * not yet completed on that list.  So, as a final cleanup we go through
853  * the osync code to catch these locked, dirty buffers without requeuing
854  * any newly dirty buffers for write.
855  */
856 
857 int fsync_inode_buffers(struct inode *inode)
858 {
859         struct buffer_head *bh;
860         struct inode tmp;
861         int err = 0, err2;
862         
863         INIT_LIST_HEAD(&tmp.i_dirty_buffers);
864         
865         spin_lock(&lru_list_lock);
866 
867         while (!list_empty(&inode->i_dirty_buffers)) {
868                 bh = BH_ENTRY(inode->i_dirty_buffers.next);
869                 list_del(&bh->b_inode_buffers);
870                 if (!buffer_dirty(bh) && !buffer_locked(bh))
871                         bh->b_inode = NULL;
872                 else {
873                         bh->b_inode = &tmp;
874                         list_add(&bh->b_inode_buffers, &tmp.i_dirty_buffers);
875                         if (buffer_dirty(bh)) {
876                                 atomic_inc(&bh->b_count);
877                                 spin_unlock(&lru_list_lock);
878                                 ll_rw_block(WRITE, 1, &bh);
879                                 brelse(bh);
880                                 spin_lock(&lru_list_lock);
881                         }
882                 }
883         }
884 
885         while (!list_empty(&tmp.i_dirty_buffers)) {
886                 bh = BH_ENTRY(tmp.i_dirty_buffers.prev);
887                 remove_inode_queue(bh);
888                 atomic_inc(&bh->b_count);
889                 spin_unlock(&lru_list_lock);
890                 wait_on_buffer(bh);
891                 if (!buffer_uptodate(bh))
892                         err = -EIO;
893                 brelse(bh);
894                 spin_lock(&lru_list_lock);
895         }
896         
897         spin_unlock(&lru_list_lock);
898         err2 = osync_inode_buffers(inode);
899 
900         if (err)
901                 return err;
902         else
903                 return err2;
904 }
905 
906 
907 /*
908  * osync is designed to support O_SYNC io.  It waits synchronously for
909  * all already-submitted IO to complete, but does not queue any new
910  * writes to the disk.
911  *
912  * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
913  * you dirty the buffers, and then use osync_inode_buffers to wait for
914  * completion.  Any other dirty buffers which are not yet queued for
915  * write will not be flushed to disk by the osync.
916  */
917 
918 int osync_inode_buffers(struct inode *inode)
919 {
920         struct buffer_head *bh;
921         struct list_head *list;
922         int err = 0;
923 
924         spin_lock(&lru_list_lock);
925         
926  repeat:
927         
928         for (list = inode->i_dirty_buffers.prev; 
929              bh = BH_ENTRY(list), list != &inode->i_dirty_buffers;
930              list = bh->b_inode_buffers.prev) {
931                 if (buffer_locked(bh)) {
932                         atomic_inc(&bh->b_count);
933                         spin_unlock(&lru_list_lock);
934                         wait_on_buffer(bh);
935                         if (!buffer_uptodate(bh))
936                                 err = -EIO;
937                         brelse(bh);
938                         spin_lock(&lru_list_lock);
939                         goto repeat;
940                 }
941         }
942 
943         spin_unlock(&lru_list_lock);
944         return err;
945 }
946 
947 
948 /*
949  * Invalidate any and all dirty buffers on a given inode.  We are
950  * probably unmounting the fs, but that doesn't mean we have already
951  * done a sync().  Just drop the buffers from the inode list.
952  */
953 void invalidate_inode_buffers(struct inode *inode)
954 {
955         struct list_head *list, *next;
956         
957         spin_lock(&lru_list_lock);
958         list = inode->i_dirty_buffers.next; 
959         while (list != &inode->i_dirty_buffers) {
960                 next = list->next;
961                 remove_inode_queue(BH_ENTRY(list));
962                 list = next;
963         }
964         spin_unlock(&lru_list_lock);
965 }
966 
967 
968 /*
969  * Ok, this is getblk, and it isn't very clear, again to hinder
970  * race-conditions. Most of the code is seldom used, (ie repeating),
971  * so it should be much more efficient than it looks.
972  *
973  * The algorithm is changed: hopefully better, and an elusive bug removed.
974  *
975  * 14.02.92: changed it to sync dirty buffers a bit: better performance
976  * when the filesystem starts to get full of dirty blocks (I hope).
977  */
978 struct buffer_head * getblk(kdev_t dev, int block, int size)
979 {
980         struct buffer_head * bh;
981         int isize;
982 
983 repeat:
984         spin_lock(&lru_list_lock);
985         write_lock(&hash_table_lock);
986         bh = __get_hash_table(dev, block, size);
987         if (bh)
988                 goto out;
989 
990         isize = BUFSIZE_INDEX(size);
991         spin_lock(&free_list[isize].lock);
992         bh = free_list[isize].list;
993         if (bh) {
994                 __remove_from_free_list(bh, isize);
995                 atomic_set(&bh->b_count, 1);
996         }
997         spin_unlock(&free_list[isize].lock);
998 
999         /*
1000          * OK, FINALLY we know that this buffer is the only one of
1001          * its kind, we hold a reference (b_count>0), it is unlocked,
1002          * and it is clean.
1003          */
1004         if (bh) {
1005                 init_buffer(bh, NULL, NULL);
1006                 bh->b_dev = dev;
1007                 bh->b_blocknr = block;
1008                 bh->b_state = 1 << BH_Mapped;
1009 
1010                 /* Insert the buffer into the regular lists */
1011                 __insert_into_queues(bh);
1012         out:
1013                 write_unlock(&hash_table_lock);
1014                 spin_unlock(&lru_list_lock);
1015                 touch_buffer(bh);
1016                 return bh;
1017         }
1018 
1019         /*
1020          * If we block while refilling the free list, somebody may
1021          * create the buffer first ... search the hashes again.
1022          */
1023         write_unlock(&hash_table_lock);
1024         spin_unlock(&lru_list_lock);
1025         refill_freelist(size);
1026         goto repeat;
1027 }
1028 
1029 /* -1 -> no need to flush
1030     0 -> async flush
1031     1 -> sync flush (wait for I/O completation) */
1032 int balance_dirty_state(kdev_t dev)
1033 {
1034         unsigned long dirty, tot, hard_dirty_limit, soft_dirty_limit;
1035         int shortage;
1036 
1037         dirty = size_buffers_type[BUF_DIRTY] >> PAGE_SHIFT;
1038         tot = nr_free_buffer_pages();
1039 
1040         dirty *= 100;
1041         soft_dirty_limit = tot * bdf_prm.b_un.nfract;
1042         hard_dirty_limit = tot * bdf_prm.b_un.nfract_sync;
1043 
1044         /* First, check for the "real" dirty limit. */
1045         if (dirty > soft_dirty_limit) {
1046                 if (dirty > hard_dirty_limit)
1047                         return 1;
1048                 return 0;
1049         }
1050 
1051         /*
1052          * If we are about to get low on free pages and
1053          * cleaning the inactive_dirty pages would help
1054          * fix this, wake up bdflush.
1055          */
1056         shortage = free_shortage();
1057         if (shortage && nr_inactive_dirty_pages > shortage &&
1058                         nr_inactive_dirty_pages > freepages.high)
1059                 return 0;
1060 
1061         return -1;
1062 }
1063 
1064 /*
1065  * if a new dirty buffer is created we need to balance bdflush.
1066  *
1067  * in the future we might want to make bdflush aware of different
1068  * pressures on different devices - thus the (currently unused)
1069  * 'dev' parameter.
1070  */
1071 void balance_dirty(kdev_t dev)
1072 {
1073         int state = balance_dirty_state(dev);
1074 
1075         if (state < 0)
1076                 return;
1077         wakeup_bdflush(state);
1078 }
1079 
1080 static __inline__ void __mark_dirty(struct buffer_head *bh)
1081 {
1082         bh->b_flushtime = jiffies + bdf_prm.b_un.age_buffer;
1083         refile_buffer(bh);
1084 }
1085 
1086 /* atomic version, the user must call balance_dirty() by hand
1087    as soon as it become possible to block */
1088 void __mark_buffer_dirty(struct buffer_head *bh)
1089 {
1090         if (!atomic_set_buffer_dirty(bh))
1091                 __mark_dirty(bh);
1092 }
1093 
1094 void mark_buffer_dirty(struct buffer_head *bh)
1095 {
1096         if (!atomic_set_buffer_dirty(bh)) {
1097                 __mark_dirty(bh);
1098                 balance_dirty(bh->b_dev);
1099         }
1100 }
1101 
1102 /*
1103  * A buffer may need to be moved from one buffer list to another
1104  * (e.g. in case it is not shared any more). Handle this.
1105  */
1106 static void __refile_buffer(struct buffer_head *bh)
1107 {
1108         int dispose = BUF_CLEAN;
1109         if (buffer_locked(bh))
1110                 dispose = BUF_LOCKED;
1111         if (buffer_dirty(bh))
1112                 dispose = BUF_DIRTY;
1113         if (buffer_protected(bh))
1114                 dispose = BUF_PROTECTED;
1115         if (dispose != bh->b_list) {
1116                 __remove_from_lru_list(bh, bh->b_list);
1117                 bh->b_list = dispose;
1118                 if (dispose == BUF_CLEAN)
1119                         remove_inode_queue(bh);
1120                 __insert_into_lru_list(bh, dispose);
1121         }
1122 }
1123 
1124 void refile_buffer(struct buffer_head *bh)
1125 {
1126         spin_lock(&lru_list_lock);
1127         __refile_buffer(bh);
1128         spin_unlock(&lru_list_lock);
1129 }
1130 
1131 /*
1132  * Release a buffer head
1133  */
1134 void __brelse(struct buffer_head * buf)
1135 {
1136         if (atomic_read(&buf->b_count)) {
1137                 atomic_dec(&buf->b_count);
1138                 return;
1139         }
1140         printk("VFS: brelse: Trying to free free buffer\n");
1141 }
1142 
1143 /*
1144  * bforget() is like brelse(), except it puts the buffer on the
1145  * free list if it can.. We can NOT free the buffer if:
1146  *  - there are other users of it
1147  *  - it is locked and thus can have active IO
1148  */
1149 void __bforget(struct buffer_head * buf)
1150 {
1151         /* grab the lru lock here to block bdflush. */
1152         spin_lock(&lru_list_lock);
1153         write_lock(&hash_table_lock);
1154         if (!atomic_dec_and_test(&buf->b_count) || buffer_locked(buf))
1155                 goto in_use;
1156         __hash_unlink(buf);
1157         remove_inode_queue(buf);
1158         write_unlock(&hash_table_lock);
1159         __remove_from_lru_list(buf, buf->b_list);
1160         spin_unlock(&lru_list_lock);
1161         put_last_free(buf);
1162         return;
1163 
1164  in_use:
1165         write_unlock(&hash_table_lock);
1166         spin_unlock(&lru_list_lock);
1167 }
1168 
1169 /*
1170  * bread() reads a specified block and returns the buffer that contains
1171  * it. It returns NULL if the block was unreadable.
1172  */
1173 struct buffer_head * bread(kdev_t dev, int block, int size)
1174 {
1175         struct buffer_head * bh;
1176 
1177         bh = getblk(dev, block, size);
1178         if (buffer_uptodate(bh))
1179                 return bh;
1180         ll_rw_block(READ, 1, &bh);
1181         wait_on_buffer(bh);
1182         if (buffer_uptodate(bh))
1183                 return bh;
1184         brelse(bh);
1185         return NULL;
1186 }
1187 
1188 /*
1189  * Note: the caller should wake up the buffer_wait list if needed.
1190  */
1191 static __inline__ void __put_unused_buffer_head(struct buffer_head * bh)
1192 {
1193         if (bh->b_inode)
1194                 BUG();
1195         if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) {
1196                 kmem_cache_free(bh_cachep, bh);
1197         } else {
1198                 bh->b_blocknr = -1;
1199                 init_waitqueue_head(&bh->b_wait);
1200                 nr_unused_buffer_heads++;
1201                 bh->b_next_free = unused_list;
1202                 bh->b_this_page = NULL;
1203                 unused_list = bh;
1204         }
1205 }
1206 
1207 /*
1208  * Reserve NR_RESERVED buffer heads for async IO requests to avoid
1209  * no-buffer-head deadlock.  Return NULL on failure; waiting for
1210  * buffer heads is now handled in create_buffers().
1211  */ 
1212 static struct buffer_head * get_unused_buffer_head(int async)
1213 {
1214         struct buffer_head * bh;
1215 
1216         spin_lock(&unused_list_lock);
1217         if (nr_unused_buffer_heads > NR_RESERVED) {
1218                 bh = unused_list;
1219                 unused_list = bh->b_next_free;
1220                 nr_unused_buffer_heads--;
1221                 spin_unlock(&unused_list_lock);
1222                 return bh;
1223         }
1224         spin_unlock(&unused_list_lock);
1225 
1226         /* This is critical.  We can't swap out pages to get
1227          * more buffer heads, because the swap-out may need
1228          * more buffer-heads itself.  Thus SLAB_BUFFER.
1229          */
1230         if((bh = kmem_cache_alloc(bh_cachep, SLAB_BUFFER)) != NULL) {
1231                 memset(bh, 0, sizeof(*bh));
1232                 init_waitqueue_head(&bh->b_wait);
1233                 return bh;
1234         }
1235 
1236         /*
1237          * If we need an async buffer, use the reserved buffer heads.
1238          */
1239         if (async) {
1240                 spin_lock(&unused_list_lock);
1241                 if (unused_list) {
1242                         bh = unused_list;
1243                         unused_list = bh->b_next_free;
1244                         nr_unused_buffer_heads--;
1245                         spin_unlock(&unused_list_lock);
1246                         return bh;
1247                 }
1248                 spin_unlock(&unused_list_lock);
1249         }
1250 #if 0
1251         /*
1252          * (Pending further analysis ...)
1253          * Ordinary (non-async) requests can use a different memory priority
1254          * to free up pages. Any swapping thus generated will use async
1255          * buffer heads.
1256          */
1257         if(!async &&
1258            (bh = kmem_cache_alloc(bh_cachep, SLAB_KERNEL)) != NULL) {
1259                 memset(bh, 0, sizeof(*bh));
1260                 init_waitqueue_head(&bh->b_wait);
1261                 return bh;
1262         }
1263 #endif
1264 
1265         return NULL;
1266 }
1267 
1268 void set_bh_page (struct buffer_head *bh, struct page *page, unsigned long offset)
1269 {
1270         bh->b_page = page;
1271         if (offset >= PAGE_SIZE)
1272                 BUG();
1273         if (PageHighMem(page))
1274                 /*
1275                  * This catches illegal uses and preserves the offset:
1276                  */
1277                 bh->b_data = (char *)(0 + offset);
1278         else
1279                 bh->b_data = page_address(page) + offset;
1280 }
1281 
1282 /*
1283  * Create the appropriate buffers when given a page for data area and
1284  * the size of each buffer.. Use the bh->b_this_page linked list to
1285  * follow the buffers created.  Return NULL if unable to create more
1286  * buffers.
1287  * The async flag is used to differentiate async IO (paging, swapping)
1288  * from ordinary buffer allocations, and only async requests are allowed
1289  * to sleep waiting for buffer heads. 
1290  */
1291 static struct buffer_head * create_buffers(struct page * page, unsigned long size, int async)
1292 {
1293         struct buffer_head *bh, *head;
1294         long offset;
1295 
1296 try_again:
1297         head = NULL;
1298         offset = PAGE_SIZE;
1299         while ((offset -= size) >= 0) {
1300                 bh = get_unused_buffer_head(async);
1301                 if (!bh)
1302                         goto no_grow;
1303 
1304                 bh->b_dev = B_FREE;  /* Flag as unused */
1305                 bh->b_this_page = head;
1306                 head = bh;
1307 
1308                 bh->b_state = 0;
1309                 bh->b_next_free = NULL;
1310                 bh->b_pprev = NULL;
1311                 atomic_set(&bh->b_count, 0);
1312                 bh->b_size = size;
1313 
1314                 set_bh_page(bh, page, offset);
1315 
1316                 bh->b_list = BUF_CLEAN;
1317                 bh->b_end_io = NULL;
1318         }
1319         return head;
1320 /*
1321  * In case anything failed, we just free everything we got.
1322  */
1323 no_grow:
1324         if (head) {
1325                 spin_lock(&unused_list_lock);
1326                 do {
1327                         bh = head;
1328                         head = head->b_this_page;
1329                         __put_unused_buffer_head(bh);
1330                 } while (head);
1331                 spin_unlock(&unused_list_lock);
1332 
1333                 /* Wake up any waiters ... */
1334                 wake_up(&buffer_wait);
1335         }
1336 
1337         /*
1338          * Return failure for non-async IO requests.  Async IO requests
1339          * are not allowed to fail, so we have to wait until buffer heads
1340          * become available.  But we don't want tasks sleeping with 
1341          * partially complete buffers, so all were released above.
1342          */
1343         if (!async)
1344                 return NULL;
1345 
1346         /* We're _really_ low on memory. Now we just
1347          * wait for old buffer heads to become free due to
1348          * finishing IO.  Since this is an async request and
1349          * the reserve list is empty, we're sure there are 
1350          * async buffer heads in use.
1351          */
1352         run_task_queue(&tq_disk);
1353 
1354         /* 
1355          * Set our state for sleeping, then check again for buffer heads.
1356          * This ensures we won't miss a wake_up from an interrupt.
1357          */
1358         wait_event(buffer_wait, nr_unused_buffer_heads >= MAX_BUF_PER_PAGE);
1359         goto try_again;
1360 }
1361 
1362 static void unmap_buffer(struct buffer_head * bh)
1363 {
1364         if (buffer_mapped(bh)) {
1365                 mark_buffer_clean(bh);
1366                 wait_on_buffer(bh);
1367                 clear_bit(BH_Uptodate, &bh->b_state);
1368                 clear_bit(BH_Mapped, &bh->b_state);
1369                 clear_bit(BH_Req, &bh->b_state);
1370                 clear_bit(BH_New, &bh->b_state);
1371         }
1372 }
1373 
1374 /*
1375  * We don't have to release all buffers here, but
1376  * we have to be sure that no dirty buffer is left
1377  * and no IO is going on (no buffer is locked), because
1378  * we have truncated the file and are going to free the
1379  * blocks on-disk..
1380  */
1381 int block_flushpage(struct page *page, unsigned long offset)
1382 {
1383         struct buffer_head *head, *bh, *next;
1384         unsigned int curr_off = 0;
1385 
1386         if (!PageLocked(page))
1387                 BUG();
1388         if (!page->buffers)
1389                 return 1;
1390 
1391         head = page->buffers;
1392         bh = head;
1393         do {
1394                 unsigned int next_off = curr_off + bh->b_size;
1395                 next = bh->b_this_page;
1396 
1397                 /*
1398                  * is this block fully flushed?
1399                  */
1400                 if (offset <= curr_off)
1401                         unmap_buffer(bh);
1402                 curr_off = next_off;
1403                 bh = next;
1404         } while (bh != head);
1405 
1406         /*
1407          * subtle. We release buffer-heads only if this is
1408          * the 'final' flushpage. We have invalidated the get_block
1409          * cached value unconditionally, so real IO is not
1410          * possible anymore.
1411          *
1412          * If the free doesn't work out, the buffers can be
1413          * left around - they just turn into anonymous buffers
1414          * instead.
1415          */
1416         if (!offset) {
1417                 if (!try_to_free_buffers(page, 0)) {
1418                         atomic_inc(&buffermem_pages);
1419                         return 0;
1420                 }
1421         }
1422 
1423         return 1;
1424 }
1425 
1426 static void create_empty_buffers(struct page *page, kdev_t dev, unsigned long blocksize)
1427 {
1428         struct buffer_head *bh, *head, *tail;
1429 
1430         head = create_buffers(page, blocksize, 1);
1431         if (page->buffers)
1432                 BUG();
1433 
1434         bh = head;
1435         do {
1436                 bh->b_dev = dev;
1437                 bh->b_blocknr = 0;
1438                 bh->b_end_io = NULL;
1439                 tail = bh;
1440                 bh = bh->b_this_page;
1441         } while (bh);
1442         tail->b_this_page = head;
1443         page->buffers = head;
1444         page_cache_get(page);
1445 }
1446 
1447 /*
1448  * We are taking a block for data and we don't want any output from any
1449  * buffer-cache aliases starting from return from that function and
1450  * until the moment when something will explicitly mark the buffer
1451  * dirty (hopefully that will not happen until we will free that block ;-)
1452  * We don't even need to mark it not-uptodate - nobody can expect
1453  * anything from a newly allocated buffer anyway. We used to used
1454  * unmap_buffer() for such invalidation, but that was wrong. We definitely
1455  * don't want to mark the alias unmapped, for example - it would confuse
1456  * anyone who might pick it with bread() afterwards...
1457  */
1458 
1459 static void unmap_underlying_metadata(struct buffer_head * bh)
1460 {
1461         struct buffer_head *old_bh;
1462 
1463         old_bh = get_hash_table(bh->b_dev, bh->b_blocknr, bh->b_size);
1464         if (old_bh) {
1465                 mark_buffer_clean(old_bh);
1466                 wait_on_buffer(old_bh);
1467                 clear_bit(BH_Req, &old_bh->b_state);
1468                 /* Here we could run brelse or bforget. We use
1469                    bforget because it will try to put the buffer
1470                    in the freelist. */
1471                 __bforget(old_bh);
1472         }
1473 }
1474 
1475 /*
1476  * NOTE! All mapped/uptodate combinations are valid:
1477  *
1478  *      Mapped  Uptodate        Meaning
1479  *
1480  *      No      No              "unknown" - must do get_block()
1481  *      No      Yes             "hole" - zero-filled
1482  *      Yes     No              "allocated" - allocated on disk, not read in
1483  *      Yes     Yes             "valid" - allocated and up-to-date in memory.
1484  *
1485  * "Dirty" is valid only with the last case (mapped+uptodate).
1486  */
1487 
1488 /*
1489  * block_write_full_page() is SMP-safe - currently it's still
1490  * being called with the kernel lock held, but the code is ready.
1491  */
1492 static int __block_write_full_page(struct inode *inode, struct page *page, get_block_t *get_block)
1493 {
1494         int err, i;
1495         unsigned long block;
1496         struct buffer_head *bh, *head;
1497 
1498         if (!PageLocked(page))
1499                 BUG();
1500 
1501         if (!page->buffers)
1502                 create_empty_buffers(page, inode->i_dev, inode->i_sb->s_blocksize);
1503         head = page->buffers;
1504 
1505         block = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
1506 
1507         bh = head;
1508         i = 0;
1509 
1510         /* Stage 1: make sure we have all the buffers mapped! */
1511         do {
1512                 /*
1513                  * If the buffer isn't up-to-date, we can't be sure
1514                  * that the buffer has been initialized with the proper
1515                  * block number information etc..
1516                  *
1517                  * Leave it to the low-level FS to make all those
1518                  * decisions (block #0 may actually be a valid block)
1519                  */
1520                 if (!buffer_mapped(bh)) {
1521                         err = get_block(inode, block, bh, 1);
1522                         if (err)
1523                                 goto out;
1524                         if (buffer_new(bh))
1525                                 unmap_underlying_metadata(bh);
1526                 }
1527                 bh = bh->b_this_page;
1528                 block++;
1529         } while (bh != head);
1530 
1531         /* Stage 2: lock the buffers, mark them clean */
1532         do {
1533                 lock_buffer(bh);
1534                 bh->b_end_io = end_buffer_io_async;
1535                 atomic_inc(&bh->b_count);
1536                 set_bit(BH_Uptodate, &bh->b_state);
1537                 clear_bit(BH_Dirty, &bh->b_state);
1538                 bh = bh->b_this_page;
1539         } while (bh != head);
1540 
1541         /* Stage 3: submit the IO */
1542         do {
1543                 submit_bh(WRITE, bh);
1544                 bh = bh->b_this_page;           
1545         } while (bh != head);
1546 
1547         /* Done - end_buffer_io_async will unlock */
1548         SetPageUptodate(page);
1549         return 0;
1550 
1551 out:
1552         ClearPageUptodate(page);
1553         UnlockPage(page);
1554         return err;
1555 }
1556 
1557 static int __block_prepare_write(struct inode *inode, struct page *page,
1558                 unsigned from, unsigned to, get_block_t *get_block)
1559 {
1560         unsigned block_start, block_end;
1561         unsigned long block;
1562         int err = 0;
1563         unsigned blocksize, bbits;
1564         struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
1565         char *kaddr = kmap(page);
1566 
1567         blocksize = inode->i_sb->s_blocksize;
1568         if (!page->buffers)
1569                 create_empty_buffers(page, inode->i_dev, blocksize);
1570         head = page->buffers;
1571 
1572         bbits = inode->i_sb->s_blocksize_bits;
1573         block = page->index << (PAGE_CACHE_SHIFT - bbits);
1574 
1575         for(bh = head, block_start = 0; bh != head || !block_start;
1576             block++, block_start=block_end, bh = bh->b_this_page) {
1577                 if (!bh)
1578                         BUG();
1579                 block_end = block_start+blocksize;
1580                 if (block_end <= from)
1581                         continue;
1582                 if (block_start >= to)
1583                         break;
1584                 if (!buffer_mapped(bh)) {
1585                         err = get_block(inode, block, bh, 1);
1586                         if (err)
1587                                 goto out;
1588                         if (buffer_new(bh)) {
1589                                 unmap_underlying_metadata(bh);
1590                                 if (Page_Uptodate(page)) {
1591                                         set_bit(BH_Uptodate, &bh->b_state);
1592                                         continue;
1593                                 }
1594                                 if (block_end > to)
1595                                         memset(kaddr+to, 0, block_end-to);
1596                                 if (block_start < from)
1597                                         memset(kaddr+block_start, 0, from-block_start);
1598                                 if (block_end > to || block_start < from)
1599                                         flush_dcache_page(page);
1600                                 continue;
1601                         }
1602                 }
1603                 if (Page_Uptodate(page)) {
1604                         set_bit(BH_Uptodate, &bh->b_state);
1605                         continue; 
1606                 }
1607                 if (!buffer_uptodate(bh) &&
1608                      (block_start < from || block_end > to)) {
1609                         ll_rw_block(READ, 1, &bh);
1610                         *wait_bh++=bh;
1611                 }
1612         }
1613         /*
1614          * If we issued read requests - let them complete.
1615          */
1616         while(wait_bh > wait) {
1617                 wait_on_buffer(*--wait_bh);
1618                 err = -EIO;
1619                 if (!buffer_uptodate(*wait_bh))
1620                         goto out;
1621         }
1622         return 0;
1623 out:
1624         return err;
1625 }
1626 
1627 static int __block_commit_write(struct inode *inode, struct page *page,
1628                 unsigned from, unsigned to)
1629 {
1630         unsigned block_start, block_end;
1631         int partial = 0, need_balance_dirty = 0;
1632         unsigned blocksize;
1633         struct buffer_head *bh, *head;
1634 
1635         blocksize = inode->i_sb->s_blocksize;
1636 
1637         for(bh = head = page->buffers, block_start = 0;
1638             bh != head || !block_start;
1639             block_start=block_end, bh = bh->b_this_page) {
1640                 block_end = block_start + blocksize;
1641                 if (block_end <= from || block_start >= to) {
1642                         if (!buffer_uptodate(bh))
1643                                 partial = 1;
1644                 } else {
1645                         set_bit(BH_Uptodate, &bh->b_state);
1646                         if (!atomic_set_buffer_dirty(bh)) {
1647                                 __mark_dirty(bh);
1648                                 buffer_insert_inode_queue(bh, inode);
1649                                 need_balance_dirty = 1;
1650                         }
1651                 }
1652         }
1653 
1654         if (need_balance_dirty)
1655                 balance_dirty(bh->b_dev);
1656         /*
1657          * is this a partial write that happened to make all buffers
1658          * uptodate then we can optimize away a bogus readpage() for
1659          * the next read(). Here we 'discover' wether the page went
1660          * uptodate as a result of this (potentially partial) write.
1661          */
1662         if (!partial)
1663                 SetPageUptodate(page);
1664         return 0;
1665 }
1666 
1667 /*
1668  * Generic "read page" function for block devices that have the normal
1669  * get_block functionality. This is most of the block device filesystems.
1670  * Reads the page asynchronously --- the unlock_buffer() and
1671  * mark_buffer_uptodate() functions propagate buffer state into the
1672  * page struct once IO has completed.
1673  */
1674 int block_read_full_page(struct page *page, get_block_t *get_block)
1675 {
1676         struct inode *inode = page->mapping->host;
1677         unsigned long iblock, lblock;
1678         struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
1679         unsigned int blocksize, blocks;
1680         int nr, i;
1681 
1682         if (!PageLocked(page))
1683                 PAGE_BUG(page);
1684         blocksize = inode->i_sb->s_blocksize;
1685         if (!page->buffers)
1686                 create_empty_buffers(page, inode->i_dev, blocksize);
1687         head = page->buffers;
1688 
1689         blocks = PAGE_CACHE_SIZE >> inode->i_sb->s_blocksize_bits;
1690         iblock = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
1691         lblock = (inode->i_size+blocksize-1) >> inode->i_sb->s_blocksize_bits;
1692         bh = head;
1693         nr = 0;
1694         i = 0;
1695 
1696         do {
1697                 if (buffer_uptodate(bh))
1698                         continue;
1699 
1700                 if (!buffer_mapped(bh)) {
1701                         if (iblock < lblock) {
1702                                 if (get_block(inode, iblock, bh, 0))
1703                                         continue;
1704                         }
1705                         if (!buffer_mapped(bh)) {
1706                                 memset(kmap(page) + i*blocksize, 0, blocksize);
1707                                 flush_dcache_page(page);
1708                                 kunmap(page);
1709                                 set_bit(BH_Uptodate, &bh->b_state);
1710                                 continue;
1711                         }
1712                         /* get_block() might have updated the buffer synchronously */
1713                         if (buffer_uptodate(bh))
1714                                 continue;
1715                 }
1716 
1717                 arr[nr] = bh;
1718                 nr++;
1719         } while (i++, iblock++, (bh = bh->b_this_page) != head);
1720 
1721         if (!nr) {
1722                 /*
1723                  * all buffers are uptodate - we can set the page
1724                  * uptodate as well.
1725                  */
1726                 SetPageUptodate(page);
1727                 UnlockPage(page);
1728                 return 0;
1729         }
1730 
1731         /* Stage two: lock the buffers */
1732         for (i = 0; i < nr; i++) {
1733                 struct buffer_head * bh = arr[i];
1734                 lock_buffer(bh);
1735                 bh->b_end_io = end_buffer_io_async;
1736                 atomic_inc(&bh->b_count);
1737         }
1738 
1739         /* Stage 3: start the IO */
1740         for (i = 0; i < nr; i++)
1741                 submit_bh(READ, arr[i]);
1742 
1743         return 0;
1744 }
1745 
1746 /*
1747  * For moronic filesystems that do not allow holes in file.
1748  * We may have to extend the file.
1749  */
1750 
1751 int cont_prepare_write(struct page *page, unsigned offset, unsigned to, get_block_t *get_block, unsigned long *bytes)
1752 {
1753         struct address_space *mapping = page->mapping;
1754         struct inode *inode = mapping->host;
1755         struct page *new_page;
1756         unsigned long pgpos;
1757         long status;
1758         unsigned zerofrom;
1759         unsigned blocksize = inode->i_sb->s_blocksize;
1760         char *kaddr;
1761 
1762         while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
1763                 status = -ENOMEM;
1764                 new_page = grab_cache_page(mapping, pgpos);
1765                 if (!new_page)
1766                         goto out;
1767                 /* we might sleep */
1768                 if (*bytes>>PAGE_CACHE_SHIFT != pgpos) {
1769                         UnlockPage(new_page);
1770                         page_cache_release(new_page);
1771                         continue;
1772                 }
1773                 zerofrom = *bytes & ~PAGE_CACHE_MASK;
1774                 if (zerofrom & (blocksize-1)) {
1775                         *bytes |= (blocksize-1);
1776                         (*bytes)++;
1777                 }
1778                 status = __block_prepare_write(inode, new_page, zerofrom,
1779                                                 PAGE_CACHE_SIZE, get_block);
1780                 if (status)
1781                         goto out_unmap;
1782                 kaddr = page_address(new_page);
1783                 memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom);
1784                 flush_dcache_page(new_page);
1785                 __block_commit_write(inode, new_page, zerofrom, PAGE_CACHE_SIZE);
1786                 kunmap(new_page);
1787                 UnlockPage(new_page);
1788                 page_cache_release(new_page);
1789         }
1790 
1791         if (page->index < pgpos) {
1792                 /* completely inside the area */
1793                 zerofrom = offset;
1794         } else {
1795                 /* page covers the boundary, find the boundary offset */
1796                 zerofrom = *bytes & ~PAGE_CACHE_MASK;
1797 
1798                 /* if we will expand the thing last block will be filled */
1799                 if (to > zerofrom && (zerofrom & (blocksize-1))) {
1800                         *bytes |= (blocksize-1);
1801                         (*bytes)++;
1802                 }
1803 
1804                 /* starting below the boundary? Nothing to zero out */
1805                 if (offset <= zerofrom)
1806                         zerofrom = offset;
1807         }
1808         status = __block_prepare_write(inode, page, zerofrom, to, get_block);
1809         if (status)
1810                 goto out1;
1811         kaddr = page_address(page);
1812         if (zerofrom < offset) {
1813                 memset(kaddr+zerofrom, 0, offset-zerofrom);
1814                 flush_dcache_page(page);
1815                 __block_commit_write(inode, page, zerofrom, offset);
1816         }
1817         return 0;
1818 out1:
1819         ClearPageUptodate(page);
1820         kunmap(page);
1821         return status;
1822 
1823 out_unmap:
1824         ClearPageUptodate(new_page);
1825         kunmap(new_page);
1826         UnlockPage(new_page);
1827         page_cache_release(new_page);
1828 out:
1829         return status;
1830 }
1831 
1832 int block_prepare_write(struct page *page, unsigned from, unsigned to,
1833                         get_block_t *get_block)
1834 {
1835         struct inode *inode = page->mapping->host;
1836         int err = __block_prepare_write(inode, page, from, to, get_block);
1837         if (err) {
1838                 ClearPageUptodate(page);
1839                 kunmap(page);
1840         }
1841         return err;
1842 }
1843 
1844 int generic_commit_write(struct file *file, struct page *page,
1845                 unsigned from, unsigned to)
1846 {
1847         struct inode *inode = page->mapping->host;
1848         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1849         __block_commit_write(inode,page,from,to);
1850         kunmap(page);
1851         if (pos > inode->i_size) {
1852                 inode->i_size = pos;
1853                 mark_inode_dirty(inode);
1854         }
1855         return 0;
1856 }
1857 
1858 int block_truncate_page(struct address_space *mapping, loff_t from, get_block_t *get_block)
1859 {
1860         unsigned long index = from >> PAGE_CACHE_SHIFT;
1861         unsigned offset = from & (PAGE_CACHE_SIZE-1);
1862         unsigned blocksize, iblock, length, pos;
1863         struct inode *inode = mapping->host;
1864         struct page *page;
1865         struct buffer_head *bh;
1866         int err;
1867 
1868         blocksize = inode->i_sb->s_blocksize;
1869         length = offset & (blocksize - 1);
1870 
1871         /* Block boundary? Nothing to do */
1872         if (!length)
1873                 return 0;
1874 
1875         length = blocksize - length;
1876         iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
1877         
1878         page = grab_cache_page(mapping, index);
1879         err = PTR_ERR(page);
1880         if (IS_ERR(page))
1881                 goto out;
1882 
1883         if (!page->buffers)
1884                 create_empty_buffers(page, inode->i_dev, blocksize);
1885 
1886         /* Find the buffer that contains "offset" */
1887         bh = page->buffers;
1888         pos = blocksize;
1889         while (offset >= pos) {
1890                 bh = bh->b_this_page;
1891                 iblock++;
1892                 pos += blocksize;
1893         }
1894 
1895         err = 0;
1896         if (!buffer_mapped(bh)) {
1897                 /* Hole? Nothing to do */
1898                 if (buffer_uptodate(bh))
1899                         goto unlock;
1900                 get_block(inode, iblock, bh, 0);
1901                 /* Still unmapped? Nothing to do */
1902                 if (!buffer_mapped(bh))
1903                         goto unlock;
1904         }
1905 
1906         /* Ok, it's mapped. Make sure it's up-to-date */
1907         if (Page_Uptodate(page))
1908                 set_bit(BH_Uptodate, &bh->b_state);
1909 
1910         if (!buffer_uptodate(bh)) {
1911                 err = -EIO;
1912                 ll_rw_block(READ, 1, &bh);
1913                 wait_on_buffer(bh);
1914                 /* Uhhuh. Read error. Complain and punt. */
1915                 if (!buffer_uptodate(bh))
1916                         goto unlock;
1917         }
1918 
1919         memset(kmap(page) + offset, 0, length);
1920         flush_dcache_page(page);
1921         kunmap(page);
1922 
1923         __mark_buffer_dirty(bh);
1924         err = 0;
1925 
1926 unlock:
1927         UnlockPage(page);
1928         page_cache_release(page);
1929 out:
1930         return err;
1931 }
1932 
1933 int block_write_full_page(struct page *page, get_block_t *get_block)
1934 {
1935         struct inode *inode = page->mapping->host;
1936         unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
1937         unsigned offset;
1938         int err;
1939 
1940         /* easy case */
1941         if (page->index < end_index)
1942                 return __block_write_full_page(inode, page, get_block);
1943 
1944         /* things got complicated... */
1945         offset = inode->i_size & (PAGE_CACHE_SIZE-1);
1946         /* OK, are we completely out? */
1947         if (page->index >= end_index+1 || !offset) {
1948                 UnlockPage(page);
1949                 return -EIO;
1950         }
1951 
1952         /* Sigh... will have to work, then... */
1953         err = __block_prepare_write(inode, page, 0, offset, get_block);
1954         if (!err) {
1955                 memset(page_address(page) + offset, 0, PAGE_CACHE_SIZE - offset);
1956                 flush_dcache_page(page);
1957                 __block_commit_write(inode,page,0,offset);
1958 done:
1959                 kunmap(page);
1960                 UnlockPage(page);
1961                 return err;
1962         }
1963         ClearPageUptodate(page);
1964         goto done;
1965 }
1966 
1967 int generic_block_bmap(struct address_space *mapping, long block, get_block_t *get_block)
1968 {
1969         struct buffer_head tmp;
1970         struct inode *inode = mapping->host;
1971         tmp.b_state = 0;
1972         tmp.b_blocknr = 0;
1973         get_block(inode, block, &tmp, 0);
1974         return tmp.b_blocknr;
1975 }
1976 
1977 /*
1978  * IO completion routine for a buffer_head being used for kiobuf IO: we
1979  * can't dispatch the kiobuf callback until io_count reaches 0.  
1980  */
1981 
1982 static void end_buffer_io_kiobuf(struct buffer_head *bh, int uptodate)
1983 {
1984         struct kiobuf *kiobuf;
1985         
1986         mark_buffer_uptodate(bh, uptodate);
1987 
1988         kiobuf = bh->b_private;
1989         unlock_buffer(bh);
1990         end_kio_request(kiobuf, uptodate);
1991 }
1992 
1993 
1994 /*
1995  * For brw_kiovec: submit a set of buffer_head temporary IOs and wait
1996  * for them to complete.  Clean up the buffer_heads afterwards.  
1997  */
1998 
1999 static int wait_kio(int rw, int nr, struct buffer_head *bh[], int size)
2000 {
2001         int iosize;
2002         int i;
2003         struct buffer_head *tmp;
2004 
2005 
2006         iosize = 0;
2007         spin_lock(&unused_list_lock);
2008 
2009         for (i = nr; --i >= 0; ) {
2010                 iosize += size;
2011                 tmp = bh[i];
2012                 if (buffer_locked(tmp)) {
2013                         spin_unlock(&unused_list_lock);
2014                         wait_on_buffer(tmp);
2015                         spin_lock(&unused_list_lock);
2016                 }
2017                 
2018                 if (!buffer_uptodate(tmp)) {
2019                         /* We are traversing bh'es in reverse order so
2020                            clearing iosize on error calculates the
2021                            amount of IO before the first error. */
2022                         iosize = 0;
2023                 }
2024                 __put_unused_buffer_head(tmp);
2025         }
2026         
2027         spin_unlock(&unused_list_lock);
2028 
2029         return iosize;
2030 }
2031 
2032 /*
2033  * Start I/O on a physical range of kernel memory, defined by a vector
2034  * of kiobuf structs (much like a user-space iovec list).
2035  *
2036  * The kiobuf must already be locked for IO.  IO is submitted
2037  * asynchronously: you need to check page->locked, page->uptodate, and
2038  * maybe wait on page->wait.
2039  *
2040  * It is up to the caller to make sure that there are enough blocks
2041  * passed in to completely map the iobufs to disk.
2042  */
2043 
2044 int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], 
2045                kdev_t dev, unsigned long b[], int size)
2046 {
2047         int             err;
2048         int             length;
2049         int             transferred;
2050         int             i;
2051         int             bufind;
2052         int             pageind;
2053         int             bhind;
2054         int             offset;
2055         unsigned long   blocknr;
2056         struct kiobuf * iobuf = NULL;
2057         struct page *   map;
2058         struct buffer_head *tmp, *bh[KIO_MAX_SECTORS];
2059 
2060         if (!nr)
2061                 return 0;
2062         
2063         /* 
2064          * First, do some alignment and validity checks 
2065          */
2066         for (i = 0; i < nr; i++) {
2067                 iobuf = iovec[i];
2068                 if ((iobuf->offset & (size-1)) ||
2069                     (iobuf->length & (size-1)))
2070                         return -EINVAL;
2071                 if (!iobuf->nr_pages)
2072                         panic("brw_kiovec: iobuf not initialised");
2073         }
2074 
2075         /* 
2076          * OK to walk down the iovec doing page IO on each page we find. 
2077          */
2078         bufind = bhind = transferred = err = 0;
2079         for (i = 0; i < nr; i++) {
2080                 iobuf = iovec[i];
2081                 offset = iobuf->offset;
2082                 length = iobuf->length;
2083                 iobuf->errno = 0;
2084                 
2085                 for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
2086                         map  = iobuf->maplist[pageind];
2087                         if (!map) {
2088                                 err = -EFAULT;
2089                                 goto error;
2090                         }
2091                         
2092                         while (length > 0) {
2093                                 blocknr = b[bufind++];
2094                                 tmp = get_unused_buffer_head(0);
2095                                 if (!tmp) {
2096                                         err = -ENOMEM;
2097                                         goto error;
2098                                 }
2099                                 
2100                                 tmp->b_dev = B_FREE;
2101                                 tmp->b_size = size;
2102                                 set_bh_page(tmp, map, offset);
2103                                 tmp->b_this_page = tmp;
2104 
2105                                 init_buffer(tmp, end_buffer_io_kiobuf, iobuf);
2106                                 tmp->b_dev = dev;
2107                                 tmp->b_blocknr = blocknr;
2108                                 tmp->b_state = (1 << BH_Mapped) | (1 << BH_Lock) | (1 << BH_Req);
2109 
2110                                 if (rw == WRITE) {
2111                                         set_bit(BH_Uptodate, &tmp->b_state);
2112                                         clear_bit(BH_Dirty, &tmp->b_state);
2113                                 }
2114 
2115                                 bh[bhind++] = tmp;
2116                                 length -= size;
2117                                 offset += size;
2118 
2119                                 atomic_inc(&iobuf->io_count);
2120 
2121                                 submit_bh(rw, tmp);
2122                                 /* 
2123                                  * Wait for IO if we have got too much 
2124                                  */
2125                                 if (bhind >= KIO_MAX_SECTORS) {
2126                                         err = wait_kio(rw, bhind, bh, size);
2127                                         if (err >= 0)
2128                                                 transferred += err;
2129                                         else
2130                                                 goto finished;
2131                                         bhind = 0;
2132                                 }
2133                                 
2134                                 if (offset >= PAGE_SIZE) {
2135                                         offset = 0;
2136                                         break;
2137                                 }
2138                         } /* End of block loop */
2139                 } /* End of page loop */                
2140         } /* End of iovec loop */
2141 
2142         /* Is there any IO still left to submit? */
2143         if (bhind) {
2144                 err = wait_kio(rw, bhind, bh, size);
2145                 if (err >= 0)
2146                         transferred += err;
2147                 else
2148                         goto finished;
2149         }
2150 
2151  finished:
2152         if (transferred)
2153                 return transferred;
2154         return err;
2155 
2156  error:
2157         /* We got an error allocating the bh'es.  Just free the current
2158            buffer_heads and exit. */
2159         spin_lock(&unused_list_lock);
2160         for (i = bhind; --i >= 0; ) {
2161                 __put_unused_buffer_head(bh[i]);
2162         }
2163         spin_unlock(&unused_list_lock);
2164         goto finished;
2165 }
2166 
2167 /*
2168  * Start I/O on a page.
2169  * This function expects the page to be locked and may return
2170  * before I/O is complete. You then have to check page->locked,
2171  * page->uptodate, and maybe wait on page->wait.
2172  *
2173  * brw_page() is SMP-safe, although it's being called with the
2174  * kernel lock held - but the code is ready.
2175  *
2176  * FIXME: we need a swapper_inode->get_block function to remove
2177  *        some of the bmap kludges and interface ugliness here.
2178  */
2179 int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size)
2180 {
2181         struct buffer_head *head, *bh;
2182 
2183         if (!PageLocked(page))
2184                 panic("brw_page: page not locked for I/O");
2185 
2186         if (!page->buffers)
2187                 create_empty_buffers(page, dev, size);
2188         head = bh = page->buffers;
2189 
2190         /* Stage 1: lock all the buffers */
2191         do {
2192                 lock_buffer(bh);
2193                 bh->b_blocknr = *(b++);
2194                 set_bit(BH_Mapped, &bh->b_state);
2195                 bh->b_end_io = end_buffer_io_async;
2196                 atomic_inc(&bh->b_count);
2197                 bh = bh->b_this_page;
2198         } while (bh != head);
2199 
2200         /* Stage 2: start the IO */
2201         do {
2202                 submit_bh(rw, bh);
2203                 bh = bh->b_this_page;
2204         } while (bh != head);
2205         return 0;
2206 }
2207 
2208 int block_symlink(struct inode *inode, const char *symname, int len)
2209 {
2210         struct address_space *mapping = inode->i_mapping;
2211         struct page *page = grab_cache_page(mapping, 0);
2212         int err = -ENOMEM;
2213         char *kaddr;
2214 
2215         if (!page)
2216                 goto fail;
2217         err = mapping->a_ops->prepare_write(NULL, page, 0, len-1);
2218         if (err)
2219                 goto fail_map;
2220         kaddr = page_address(page);
2221         memcpy(kaddr, symname, len-1);
2222         mapping->a_ops->commit_write(NULL, page, 0, len-1);
2223         /*
2224          * Notice that we are _not_ going to block here - end of page is
2225          * unmapped, so this will only try to map the rest of page, see
2226          * that it is unmapped (typically even will not look into inode -
2227          * ->i_size will be enough for everything) and zero it out.
2228          * OTOH it's obviously correct and should make the page up-to-date.
2229          */
2230         err = mapping->a_ops->readpage(NULL, page);
2231         wait_on_page(page);
2232         page_cache_release(page);
2233         if (err < 0)
2234                 goto fail;
2235         mark_inode_dirty(inode);
2236         return 0;
2237 fail_map:
2238         UnlockPage(page);
2239         page_cache_release(page);
2240 fail:
2241         return err;
2242 }
2243 
2244 /*
2245  * Try to increase the number of buffers available: the size argument
2246  * is used to determine what kind of buffers we want.
2247  */
2248 static int grow_buffers(int size)
2249 {
2250         struct page * page;
2251         struct buffer_head *bh, *tmp;
2252         struct buffer_head * insert_point;
2253         int isize;
2254 
2255         if ((size & 511) || (size > PAGE_SIZE)) {
2256                 printk("VFS: grow_buffers: size = %d\n",size);
2257                 return 0;
2258         }
2259 
2260         page = alloc_page(GFP_BUFFER);
2261         if (!page)
2262                 goto out;
2263         LockPage(page);
2264         bh = create_buffers(page, size, 0);
2265         if (!bh)
2266                 goto no_buffer_head;
2267 
2268         isize = BUFSIZE_INDEX(size);
2269 
2270         spin_lock(&free_list[isize].lock);
2271         insert_point = free_list[isize].list;
2272         tmp = bh;
2273         while (1) {
2274                 if (insert_point) {
2275                         tmp->b_next_free = insert_point->b_next_free;
2276                         tmp->b_prev_free = insert_point;
2277                         insert_point->b_next_free->b_prev_free = tmp;
2278                         insert_point->b_next_free = tmp;
2279                 } else {
2280                         tmp->b_prev_free = tmp;
2281                         tmp->b_next_free = tmp;
2282                 }
2283                 insert_point = tmp;
2284                 if (tmp->b_this_page)
2285                         tmp = tmp->b_this_page;
2286                 else
2287                         break;
2288         }
2289         tmp->b_this_page = bh;
2290         free_list[isize].list = bh;
2291         spin_unlock(&free_list[isize].lock);
2292 
2293         page->buffers = bh;
2294         page->flags &= ~(1 << PG_referenced);
2295         lru_cache_add(page);
2296         UnlockPage(page);
2297         atomic_inc(&buffermem_pages);
2298         return 1;
2299 
2300 no_buffer_head:
2301         UnlockPage(page);
2302         page_cache_release(page);
2303 out:
2304         return 0;
2305 }
2306 
2307 /*
2308  * Sync all the buffers on one page..
2309  *
2310  * If we have old buffers that are locked, we'll
2311  * wait on them, but we won't wait on the new ones
2312  * we're writing out now.
2313  *
2314  * This all is required so that we can free up memory
2315  * later.
2316  *
2317  * Wait:
2318  *      0 - no wait (this does not get called - see try_to_free_buffers below)
2319  *      1 - start IO for dirty buffers
2320  *      2 - wait for completion of locked buffers
2321  */
2322 static void sync_page_buffers(struct buffer_head *bh, int wait)
2323 {
2324         struct buffer_head * tmp = bh;
2325 
2326         do {
2327                 struct buffer_head *p = tmp;
2328                 tmp = tmp->b_this_page;
2329                 if (buffer_locked(p)) {
2330                         if (wait > 1)
2331                                 __wait_on_buffer(p);
2332                 } else if (buffer_dirty(p))
2333                         ll_rw_block(WRITE, 1, &p);
2334         } while (tmp != bh);
2335 }
2336 
2337 /*
2338  * Can the buffer be thrown out?
2339  */
2340 #define BUFFER_BUSY_BITS        ((1<<BH_Dirty) | (1<<BH_Lock) | (1<<BH_Protected))
2341 #define buffer_busy(bh)         (atomic_read(&(bh)->b_count) | ((bh)->b_state & BUFFER_BUSY_BITS))
2342 
2343 /*
2344  * try_to_free_buffers() checks if all the buffers on this particular page
2345  * are unused, and free's the page if so.
2346  *
2347  * Wake up bdflush() if this fails - if we're running low on memory due
2348  * to dirty buffers, we need to flush them out as quickly as possible.
2349  *
2350  * NOTE: There are quite a number of ways that threads of control can
2351  *       obtain a reference to a buffer head within a page.  So we must
2352  *       lock out all of these paths to cleanly toss the page.
2353  */
2354 int try_to_free_buffers(struct page * page, int wait)
2355 {
2356         struct buffer_head * tmp, * bh = page->buffers;
2357         int index = BUFSIZE_INDEX(bh->b_size);
2358         int loop = 0;
2359 
2360 cleaned_buffers_try_again:
2361         spin_lock(&lru_list_lock);
2362         write_lock(&hash_table_lock);
2363         spin_lock(&free_list[index].lock);
2364         tmp = bh;
2365         do {
2366                 struct buffer_head *p = tmp;
2367 
2368                 tmp = tmp->b_this_page;
2369                 if (buffer_busy(p))
2370                         goto busy_buffer_page;
2371         } while (tmp != bh);
2372 
2373         spin_lock(&unused_list_lock);
2374         tmp = bh;
2375         do {
2376                 struct buffer_head * p = tmp;
2377                 tmp = tmp->b_this_page;
2378 
2379                 /* The buffer can be either on the regular
2380                  * queues or on the free list..
2381                  */
2382                 if (p->b_dev != B_FREE) {
2383                         remove_inode_queue(p);
2384                         __remove_from_queues(p);
2385                 } else
2386                         __remove_from_free_list(p, index);
2387                 __put_unused_buffer_head(p);
2388         } while (tmp != bh);
2389         spin_unlock(&unused_list_lock);
2390 
2391         /* Wake up anyone waiting for buffer heads */
2392         wake_up(&buffer_wait);
2393 
2394         /* And free the page */
2395         page->buffers = NULL;
2396         page_cache_release(page);
2397         spin_unlock(&free_list[index].lock);
2398         write_unlock(&hash_table_lock);
2399         spin_unlock(&lru_list_lock);
2400         return 1;
2401 
2402 busy_buffer_page:
2403         /* Uhhuh, start writeback so that we don't end up with all dirty pages */
2404         spin_unlock(&free_list[index].lock);
2405         write_unlock(&hash_table_lock);
2406         spin_unlock(&lru_list_lock);
2407         if (wait) {
2408                 sync_page_buffers(bh, wait);
2409                 /* We waited synchronously, so we can free the buffers. */
2410                 if (wait > 1 && !loop) {
2411                         loop = 1;
2412                         goto cleaned_buffers_try_again;
2413                 }
2414         }
2415         return 0;
2416 }
2417 
2418 /* ================== Debugging =================== */
2419 
2420 void show_buffers(void)
2421 {
2422 #ifdef CONFIG_SMP
2423         struct buffer_head * bh;
2424         int found = 0, locked = 0, dirty = 0, used = 0, lastused = 0;
2425         int protected = 0;
2426         int nlist;
2427         static char *buf_types[NR_LIST] = { "CLEAN", "LOCKED", "DIRTY", "PROTECTED", };
2428 #endif
2429 
2430         printk("Buffer memory:   %6dkB\n",
2431                         atomic_read(&buffermem_pages) << (PAGE_SHIFT-10));
2432 
2433 #ifdef CONFIG_SMP /* trylock does nothing on UP and so we could deadlock */
2434         if (!spin_trylock(&lru_list_lock))
2435                 return;
2436         for(nlist = 0; nlist < NR_LIST; nlist++) {
2437                 found = locked = dirty = used = lastused = protected = 0;
2438                 bh = lru_list[nlist];
2439                 if(!bh) continue;
2440 
2441                 do {
2442                         found++;
2443                         if (buffer_locked(bh))
2444                                 locked++;
2445                         if (buffer_protected(bh))
2446                                 protected++;
2447                         if (buffer_dirty(bh))
2448                                 dirty++;
2449                         if (atomic_read(&bh->b_count))
2450                                 used++, lastused = found;
2451                         bh = bh->b_next_free;
2452                 } while (bh != lru_list[nlist]);
2453                 {
2454                         int tmp = nr_buffers_type[nlist];
2455                         if (found != tmp)
2456                                 printk("%9s: BUG -> found %d, reported %d\n",
2457                                        buf_types[nlist], found, tmp);
2458                 }
2459                 printk("%9s: %d buffers, %lu kbyte, %d used (last=%d), "
2460                        "%d locked, %d protected, %d dirty\n",
2461                        buf_types[nlist], found, size_buffers_type[nlist]>>10,
2462                        used, lastused, locked, protected, dirty);
2463         }
2464         spin_unlock(&lru_list_lock);
2465 #endif
2466 }
2467 
2468 /* ===================== Init ======================= */
2469 
2470 /*
2471  * allocate the hash table and init the free list
2472  * Use gfp() for the hash table to decrease TLB misses, use
2473  * SLAB cache for buffer heads.
2474  */
2475 void __init buffer_init(unsigned long mempages)
2476 {
2477         int order, i;
2478         unsigned int nr_hash;
2479 
2480         /* The buffer cache hash table is less important these days,
2481          * trim it a bit.
2482          */
2483         mempages >>= 14;
2484 
2485         mempages *= sizeof(struct buffer_head *);
2486 
2487         for (order = 0; (1 << order) < mempages; order++)
2488                 ;
2489 
2490         /* try to allocate something until we get it or we're asking
2491            for something that is really too small */
2492 
2493         do {
2494                 unsigned long tmp;
2495 
2496                 nr_hash = (PAGE_SIZE << order) / sizeof(struct buffer_head *);
2497                 bh_hash_mask = (nr_hash - 1);
2498 
2499                 tmp = nr_hash;
2500                 bh_hash_shift = 0;
2501                 while((tmp >>= 1UL) != 0UL)
2502                         bh_hash_shift++;
2503 
2504                 hash_table = (struct buffer_head **)
2505                     __get_free_pages(GFP_ATOMIC, order);
2506         } while (hash_table == NULL && --order > 0);
2507         printk("Buffer-cache hash table entries: %d (order: %d, %ld bytes)\n",
2508                nr_hash, order, (PAGE_SIZE << order));
2509 
2510         if (!hash_table)
2511                 panic("Failed to allocate buffer hash table\n");
2512 
2513         /* Setup hash chains. */
2514         for(i = 0; i < nr_hash; i++)
2515                 hash_table[i] = NULL;
2516 
2517         /* Setup free lists. */
2518         for(i = 0; i < NR_SIZES; i++) {
2519                 free_list[i].list = NULL;
2520                 free_list[i].lock = SPIN_LOCK_UNLOCKED;
2521         }
2522 
2523         /* Setup lru lists. */
2524         for(i = 0; i < NR_LIST; i++)
2525                 lru_list[i] = NULL;
2526 
2527 }
2528 
2529 
2530 /* ====================== bdflush support =================== */
2531 
2532 /* This is a simple kernel daemon, whose job it is to provide a dynamic
2533  * response to dirty buffers.  Once this process is activated, we write back
2534  * a limited number of buffers to the disks and then go back to sleep again.
2535  */
2536 
2537 /* This is the _only_ function that deals with flushing async writes
2538    to disk.
2539    NOTENOTENOTENOTE: we _only_ need to browse the DIRTY lru list
2540    as all dirty buffers lives _only_ in the DIRTY lru list.
2541    As we never browse the LOCKED and CLEAN lru lists they are infact
2542    completly useless. */
2543 static int flush_dirty_buffers(int check_flushtime)
2544 {
2545         struct buffer_head * bh, *next;
2546         int flushed = 0, i;
2547 
2548  restart:
2549         spin_lock(&lru_list_lock);
2550         bh = lru_list[BUF_DIRTY];
2551         if (!bh)
2552                 goto out_unlock;
2553         for (i = nr_buffers_type[BUF_DIRTY]; i-- > 0; bh = next) {
2554                 next = bh->b_next_free;
2555 
2556                 if (!buffer_dirty(bh)) {
2557                         __refile_buffer(bh);
2558                         continue;
2559                 }
2560                 if (buffer_locked(bh))
2561                         continue;
2562 
2563                 if (check_flushtime) {
2564                         /* The dirty lru list is chronologically ordered so
2565                            if the current bh is not yet timed out,
2566                            then also all the following bhs
2567                            will be too young. */
2568                         if (time_before(jiffies, bh->b_flushtime))
2569                                 goto out_unlock;
2570                 } else {
2571                         if (++flushed > bdf_prm.b_un.ndirty)
2572                                 goto out_unlock;
2573                 }
2574 
2575                 /* OK, now we are committed to write it out. */
2576                 atomic_inc(&bh->b_count);
2577                 spin_unlock(&lru_list_lock);
2578                 ll_rw_block(WRITE, 1, &bh);
2579                 atomic_dec(&bh->b_count);
2580 
2581                 if (current->need_resched)
2582                         schedule();
2583                 goto restart;
2584         }
2585  out_unlock:
2586         spin_unlock(&lru_list_lock);
2587 
2588         return flushed;
2589 }
2590 
2591 struct task_struct *bdflush_tsk = 0;
2592 
2593 void wakeup_bdflush(int block)
2594 {
2595         if (current != bdflush_tsk) {
2596                 wake_up_process(bdflush_tsk);
2597 
2598                 if (block)
2599                         flush_dirty_buffers(0);
2600         }
2601 }
2602 
2603 /* 
2604  * Here we attempt to write back old buffers.  We also try to flush inodes 
2605  * and supers as well, since this function is essentially "update", and 
2606  * otherwise there would be no way of ensuring that these quantities ever 
2607  * get written back.  Ideally, we would have a timestamp on the inodes
2608  * and superblocks so that we could write back only the old ones as well
2609  */
2610 
2611 static int sync_old_buffers(void)
2612 {
2613         lock_kernel();
2614         sync_supers(0);
2615         sync_inodes(0);
2616         unlock_kernel();
2617 
2618         flush_dirty_buffers(1);
2619         /* must really sync all the active I/O request to disk here */
2620         run_task_queue(&tq_disk);
2621         return 0;
2622 }
2623 
2624 int block_sync_page(struct page *page)
2625 {
2626         run_task_queue(&tq_disk);
2627         return 0;
2628 }
2629 
2630 /* This is the interface to bdflush.  As we get more sophisticated, we can
2631  * pass tuning parameters to this "process", to adjust how it behaves. 
2632  * We would want to verify each parameter, however, to make sure that it 
2633  * is reasonable. */
2634 
2635 asmlinkage long sys_bdflush(int func, long data)
2636 {
2637         if (!capable(CAP_SYS_ADMIN))
2638                 return -EPERM;
2639 
2640         if (func == 1) {
2641                 /* do_exit directly and let kupdate to do its work alone. */
2642                 do_exit(0);
2643 #if 0 /* left here as it's the only example of lazy-mm-stuff used from
2644          a syscall that doesn't care about the current mm context. */
2645                 int error;
2646                 struct mm_struct *user_mm;
2647 
2648                 /*
2649                  * bdflush will spend all of it's time in kernel-space,
2650                  * without touching user-space, so we can switch it into
2651                  * 'lazy TLB mode' to reduce the cost of context-switches
2652                  * to and from bdflush.
2653                  */
2654                 user_mm = start_lazy_tlb();
2655                 error = sync_old_buffers();
2656                 end_lazy_tlb(user_mm);
2657                 return error;
2658 #endif
2659         }
2660 
2661         /* Basically func 1 means read param 1, 2 means write param 1, etc */
2662         if (func >= 2) {
2663                 int i = (func-2) >> 1;
2664                 if (i >= 0 && i < N_PARAM) {
2665                         if ((func & 1) == 0)
2666                                 return put_user(bdf_prm.data[i], (int*)data);
2667 
2668                         if (data >= bdflush_min[i] && data <= bdflush_max[i]) {
2669                                 bdf_prm.data[i] = data;
2670                                 return 0;
2671                         }
2672                 }
2673                 return -EINVAL;
2674         }
2675 
2676         /* Having func 0 used to launch the actual bdflush and then never
2677          * return (unless explicitly killed). We return zero here to 
2678          * remain semi-compatible with present update(8) programs.
2679          */
2680         return 0;
2681 }
2682 
2683 /*
2684  * This is the actual bdflush daemon itself. It used to be started from
2685  * the syscall above, but now we launch it ourselves internally with
2686  * kernel_thread(...)  directly after the first thread in init/main.c
2687  */
2688 int bdflush(void *sem)
2689 {
2690         struct task_struct *tsk = current;
2691         int flushed;
2692         /*
2693          *      We have a bare-bones task_struct, and really should fill
2694          *      in a few more things so "top" and /proc/2/{exe,root,cwd}
2695          *      display semi-sane things. Not real crucial though...  
2696          */
2697 
2698         tsk->session = 1;
2699         tsk->pgrp = 1;
2700         strcpy(tsk->comm, "bdflush");
2701         bdflush_tsk = tsk;
2702 
2703         /* avoid getting signals */
2704         spin_lock_irq(&tsk->sigmask_lock);
2705         flush_signals(tsk);
2706         sigfillset(&tsk->blocked);
2707         recalc_sigpending(tsk);
2708         spin_unlock_irq(&tsk->sigmask_lock);
2709 
2710         up((struct semaphore *)sem);
2711 
2712         for (;;) {
2713                 CHECK_EMERGENCY_SYNC
2714 
2715                 flushed = flush_dirty_buffers(0);
2716                 if (free_shortage())
2717                         flushed += page_launder(GFP_KERNEL, 0);
2718 
2719                 /*
2720                  * If there are still a lot of dirty buffers around,
2721                  * skip the sleep and flush some more. Otherwise, we
2722                  * go to sleep waiting a wakeup.
2723                  */
2724                 set_current_state(TASK_INTERRUPTIBLE);
2725                 if (!flushed || balance_dirty_state(NODEV) < 0) {
2726                         run_task_queue(&tq_disk);
2727                         schedule();
2728                 }
2729                 /* Remember to mark us as running otherwise
2730                    the next schedule will block. */
2731                 __set_current_state(TASK_RUNNING);
2732         }
2733 }
2734 
2735 /*
2736  * This is the kernel update daemon. It was used to live in userspace
2737  * but since it's need to run safely we want it unkillable by mistake.
2738  * You don't need to change your userspace configuration since
2739  * the userspace `update` will do_exit(0) at the first sys_bdflush().
2740  */
2741 int kupdate(void *sem)
2742 {
2743         struct task_struct * tsk = current;
2744         int interval;
2745 
2746         tsk->session = 1;
2747         tsk->pgrp = 1;
2748         strcpy(tsk->comm, "kupdate");
2749 
2750         /* sigstop and sigcont will stop and wakeup kupdate */
2751         spin_lock_irq(&tsk->sigmask_lock);
2752         sigfillset(&tsk->blocked);
2753         siginitsetinv(&current->blocked, sigmask(SIGCONT) | sigmask(SIGSTOP));
2754         recalc_sigpending(tsk);
2755         spin_unlock_irq(&tsk->sigmask_lock);
2756 
2757         up((struct semaphore *)sem);
2758 
2759         for (;;) {
2760                 /* update interval */
2761                 interval = bdf_prm.b_un.interval;
2762                 if (interval) {
2763                         tsk->state = TASK_INTERRUPTIBLE;
2764                         schedule_timeout(interval);
2765                 } else {
2766                 stop_kupdate:
2767                         tsk->state = TASK_STOPPED;
2768                         schedule(); /* wait for SIGCONT */
2769                 }
2770                 /* check for sigstop */
2771                 if (signal_pending(tsk)) {
2772                         int stopped = 0;
2773                         spin_lock_irq(&tsk->sigmask_lock);
2774                         if (sigismember(&tsk->pending.signal, SIGSTOP)) {
2775                                 sigdelset(&tsk->pending.signal, SIGSTOP);
2776                                 stopped = 1;
2777                         }
2778                         recalc_sigpending(tsk);
2779                         spin_unlock_irq(&tsk->sigmask_lock);
2780                         if (stopped)
2781                                 goto stop_kupdate;
2782                 }
2783 #ifdef DEBUG
2784                 printk("kupdate() activated...\n");
2785 #endif
2786                 sync_old_buffers();
2787         }
2788 }
2789 
2790 static int __init bdflush_init(void)
2791 {
2792         DECLARE_MUTEX_LOCKED(sem);
2793         kernel_thread(bdflush, &sem, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
2794         down(&sem);
2795         kernel_thread(kupdate, &sem, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
2796         down(&sem);
2797         return 0;
2798 }
2799 
2800 module_init(bdflush_init)
2801 
2802 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.