1 /*
2 * linux/fs/nfs/read.c
3 *
4 * Block I/O for NFS
5 *
6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c
7 * modified for async RPC by okir@monad.swb.de
8 *
9 * We do an ugly hack here in order to return proper error codes to the
10 * user program when a read request failed: since generic_file_read
11 * only checks the return value of inode->i_op->readpage() which is always 0
12 * for async RPC, we set the error bit of the page to 1 when an error occurs,
13 * and make nfs_readpage transmit requests synchronously when encountering this.
14 * This is only a small problem, though, since we now retry all operations
15 * within the RPC code when root squashing is suspected.
16 */
17
18 #include <linux/config.h>
19 #include <linux/sched.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/fcntl.h>
23 #include <linux/stat.h>
24 #include <linux/mm.h>
25 #include <linux/malloc.h>
26 #include <linux/pagemap.h>
27 #include <linux/sunrpc/clnt.h>
28 #include <linux/nfs_fs.h>
29 #include <linux/nfs_page.h>
30 #include <linux/nfs_flushd.h>
31 #include <linux/smp_lock.h>
32
33 #include <asm/system.h>
34
35 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
36
37 struct nfs_read_data {
38 struct rpc_task task;
39 struct inode *inode;
40 struct rpc_cred *cred;
41 struct nfs_readargs args; /* XDR argument struct */
42 struct nfs_readres res; /* ... and result struct */
43 struct nfs_fattr fattr; /* fattr storage */
44 struct list_head pages; /* Coalesced read requests */
45 };
46
47 /*
48 * Local function declarations
49 */
50 static void nfs_readpage_result(struct rpc_task *task);
51
52 /* Hack for future NFS swap support */
53 #ifndef IS_SWAPFILE
54 # define IS_SWAPFILE(inode) (0)
55 #endif
56
57 static kmem_cache_t *nfs_rdata_cachep;
58
59 static __inline__ struct nfs_read_data *nfs_readdata_alloc(void)
60 {
61 struct nfs_read_data *p;
62 p = kmem_cache_alloc(nfs_rdata_cachep, SLAB_NFS);
63 if (p) {
64 memset(p, 0, sizeof(*p));
65 INIT_LIST_HEAD(&p->pages);
66 }
67 return p;
68 }
69
70 static __inline__ void nfs_readdata_free(struct nfs_read_data *p)
71 {
72 kmem_cache_free(nfs_rdata_cachep, p);
73 }
74
75 static void nfs_readdata_release(struct rpc_task *task)
76 {
77 struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
78 nfs_readdata_free(data);
79 }
80
81 /*
82 * Read a page synchronously.
83 */
84 static int
85 nfs_readpage_sync(struct file *file, struct inode *inode, struct page *page)
86 {
87 struct rpc_cred *cred = NULL;
88 struct nfs_fattr fattr;
89 loff_t offset = page_offset(page);
90 char *buffer;
91 int rsize = NFS_SERVER(inode)->rsize;
92 int result;
93 int count = PAGE_CACHE_SIZE;
94 int flags = IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0;
95 int eof;
96
97 dprintk("NFS: nfs_readpage_sync(%p)\n", page);
98
99 if (file)
100 cred = nfs_file_cred(file);
101
102 /*
103 * This works now because the socket layer never tries to DMA
104 * into this buffer directly.
105 */
106 buffer = kmap(page);
107 do {
108 if (count < rsize)
109 rsize = count;
110
111 dprintk("NFS: nfs_proc_read(%s, (%x/%Ld), %Ld, %d, %p)\n",
112 NFS_SERVER(inode)->hostname,
113 inode->i_dev, (long long)NFS_FILEID(inode),
114 (long long)offset, rsize, buffer);
115
116 lock_kernel();
117 result = NFS_PROTO(inode)->read(inode, cred, &fattr, flags,
118 offset, rsize, buffer, &eof);
119 nfs_refresh_inode(inode, &fattr);
120 unlock_kernel();
121
122 /*
123 * Even if we had a partial success we can't mark the page
124 * cache valid.
125 */
126 if (result < 0) {
127 if (result == -EISDIR)
128 result = -EINVAL;
129 goto io_error;
130 }
131 count -= result;
132 offset += result;
133 buffer += result;
134 if (result < rsize) /* NFSv2ism */
135 break;
136 } while (count);
137
138 memset(buffer, 0, count);
139 flush_dcache_page(page);
140 SetPageUptodate(page);
141 if (PageError(page))
142 ClearPageError(page);
143 result = 0;
144
145 io_error:
146 kunmap(page);
147 UnlockPage(page);
148 return result;
149 }
150
151 static inline struct nfs_page *
152 _nfs_find_read(struct inode *inode, struct page *page)
153 {
154 struct list_head *head, *next;
155
156 head = &inode->u.nfs_i.read;
157 next = head->next;
158 while (next != head) {
159 struct nfs_page *req = nfs_list_entry(next);
160 next = next->next;
161 if (page_index(req->wb_page) != page_index(page))
162 continue;
163 req->wb_count++;
164 return req;
165 }
166 return NULL;
167 }
168
169 static struct nfs_page *
170 nfs_find_read(struct inode *inode, struct page *page)
171 {
172 struct nfs_page *req;
173 spin_lock(&nfs_wreq_lock);
174 req = _nfs_find_read(inode, page);
175 spin_unlock(&nfs_wreq_lock);
176 return req;
177 }
178
179 /*
180 * Add a request to the inode's asynchronous read list.
181 */
182 static inline void
183 nfs_mark_request_read(struct nfs_page *req)
184 {
185 struct inode *inode = req->wb_inode;
186
187 spin_lock(&nfs_wreq_lock);
188 if (list_empty(&req->wb_list)) {
189 nfs_list_add_request(req, &inode->u.nfs_i.read);
190 inode->u.nfs_i.nread++;
191 }
192 spin_unlock(&nfs_wreq_lock);
193 /*
194 * NB: the call to inode_schedule_scan() must lie outside the
195 * spinlock since it can run flushd().
196 */
197 inode_schedule_scan(inode, req->wb_timeout);
198 }
199
200 static int
201 nfs_readpage_async(struct file *file, struct inode *inode, struct page *page)
202 {
203 struct nfs_page *req, *new = NULL;
204 int result;
205
206 for (;;) {
207 result = 0;
208 if (Page_Uptodate(page))
209 break;
210
211 req = nfs_find_read(inode, page);
212 if (req) {
213 if (page != req->wb_page) {
214 nfs_release_request(req);
215 nfs_pagein_inode(inode, page_index(page), 0);
216 continue;
217 }
218 nfs_release_request(req);
219 break;
220 }
221
222 if (new) {
223 nfs_lock_request(new);
224 new->wb_timeout = jiffies + NFS_READ_DELAY;
225 nfs_mark_request_read(new);
226 nfs_unlock_request(new);
227 new = NULL;
228 break;
229 }
230
231 result = -ENOMEM;
232 new = nfs_create_request(file, inode, page, 0, PAGE_CACHE_SIZE);
233 if (!new)
234 break;
235 }
236
237 if (inode->u.nfs_i.nread >= NFS_SERVER(inode)->rpages ||
238 page_index(page) == (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
239 nfs_pagein_inode(inode, 0, 0);
240 if (new)
241 nfs_release_request(new);
242 return result;
243 }
244
245 /*
246 * Set up the NFS read request struct
247 */
248 static void
249 nfs_read_rpcsetup(struct list_head *head, struct nfs_read_data *data)
250 {
251 struct nfs_page *req;
252 struct iovec *iov;
253 unsigned int count;
254
255 iov = data->args.iov;
256 count = 0;
257 while (!list_empty(head)) {
258 struct nfs_page *req = nfs_list_entry(head->next);
259 nfs_list_remove_request(req);
260 nfs_list_add_request(req, &data->pages);
261 iov->iov_base = kmap(req->wb_page) + req->wb_offset;
262 iov->iov_len = req->wb_bytes;
263 count += req->wb_bytes;
264 iov++;
265 data->args.nriov++;
266 }
267 req = nfs_list_entry(data->pages.next);
268 data->inode = req->wb_inode;
269 data->cred = req->wb_cred;
270 data->args.fh = NFS_FH(req->wb_inode);
271 data->args.offset = page_offset(req->wb_page) + req->wb_offset;
272 data->args.count = count;
273 data->res.fattr = &data->fattr;
274 data->res.count = count;
275 data->res.eof = 0;
276 }
277
278 static void
279 nfs_async_read_error(struct list_head *head)
280 {
281 struct nfs_page *req;
282 struct page *page;
283
284 while (!list_empty(head)) {
285 req = nfs_list_entry(head->next);
286 page = req->wb_page;
287 nfs_list_remove_request(req);
288 SetPageError(page);
289 UnlockPage(page);
290 nfs_unlock_request(req);
291 nfs_release_request(req);
292 }
293 }
294
295 static int
296 nfs_pagein_one(struct list_head *head, struct inode *inode)
297 {
298 struct rpc_task *task;
299 struct rpc_clnt *clnt = NFS_CLIENT(inode);
300 struct nfs_read_data *data;
301 struct rpc_message msg;
302 int flags;
303 sigset_t oldset;
304
305 data = nfs_readdata_alloc();
306 if (!data)
307 goto out_bad;
308 task = &data->task;
309
310 /* N.B. Do we need to test? Never called for swapfile inode */
311 flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
312
313 nfs_read_rpcsetup(head, data);
314
315 /* Finalize the task. */
316 rpc_init_task(task, clnt, nfs_readpage_result, flags);
317 task->tk_calldata = data;
318 /* Release requests */
319 task->tk_release = nfs_readdata_release;
320
321 #ifdef CONFIG_NFS_V3
322 msg.rpc_proc = (NFS_PROTO(inode)->version == 3) ? NFS3PROC_READ : NFSPROC_READ;
323 #else
324 msg.rpc_proc = NFSPROC_READ;
325 #endif
326 msg.rpc_argp = &data->args;
327 msg.rpc_resp = &data->res;
328 msg.rpc_cred = data->cred;
329
330 /* Start the async call */
331 dprintk("NFS: %4d initiated read call (req %x/%Ld count %d nriov %d.\n",
332 task->tk_pid,
333 inode->i_dev, (long long)NFS_FILEID(inode),
334 data->args.count, data->args.nriov);
335
336 rpc_clnt_sigmask(clnt, &oldset);
337 rpc_call_setup(task, &msg, 0);
338 rpc_execute(task);
339 rpc_clnt_sigunmask(clnt, &oldset);
340 return 0;
341 out_bad:
342 nfs_async_read_error(head);
343 return -ENOMEM;
344 }
345
346 static int
347 nfs_pagein_list(struct inode *inode, struct list_head *head)
348 {
349 LIST_HEAD(one_request);
350 struct nfs_page *req;
351 int error = 0;
352 unsigned int pages = 0,
353 rpages = NFS_SERVER(inode)->rpages;
354
355 while (!list_empty(head)) {
356 pages += nfs_coalesce_requests(head, &one_request, rpages);
357 req = nfs_list_entry(one_request.next);
358 error = nfs_pagein_one(&one_request, req->wb_inode);
359 if (error < 0)
360 break;
361 }
362 if (error >= 0)
363 return pages;
364
365 nfs_async_read_error(head);
366 return error;
367 }
368
369 static int
370 nfs_scan_read_timeout(struct inode *inode, struct list_head *dst)
371 {
372 int pages;
373 spin_lock(&nfs_wreq_lock);
374 pages = nfs_scan_list_timeout(&inode->u.nfs_i.read, dst, inode);
375 inode->u.nfs_i.nread -= pages;
376 if ((inode->u.nfs_i.nread == 0) != list_empty(&inode->u.nfs_i.read))
377 printk(KERN_ERR "NFS: desynchronized value of nfs_i.nread.\n");
378 spin_unlock(&nfs_wreq_lock);
379 return pages;
380 }
381
382 static int
383 nfs_scan_read(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
384 {
385 int res;
386 spin_lock(&nfs_wreq_lock);
387 res = nfs_scan_list(&inode->u.nfs_i.read, dst, NULL, idx_start, npages);
388 inode->u.nfs_i.nread -= res;
389 if ((inode->u.nfs_i.nread == 0) != list_empty(&inode->u.nfs_i.read))
390 printk(KERN_ERR "NFS: desynchronized value of nfs_i.nread.\n");
391 spin_unlock(&nfs_wreq_lock);
392 return res;
393 }
394
395 int nfs_pagein_inode(struct inode *inode, unsigned long idx_start,
396 unsigned int npages)
397 {
398 LIST_HEAD(head);
399 int res,
400 error = 0;
401
402 res = nfs_scan_read(inode, &head, idx_start, npages);
403 if (res)
404 error = nfs_pagein_list(inode, &head);
405 if (error < 0)
406 return error;
407 return res;
408 }
409
410 int nfs_pagein_timeout(struct inode *inode)
411 {
412 LIST_HEAD(head);
413 int pages,
414 error = 0;
415
416 pages = nfs_scan_read_timeout(inode, &head);
417 if (pages)
418 error = nfs_pagein_list(inode, &head);
419 if (error < 0)
420 return error;
421 return pages;
422 }
423
424 /*
425 * This is the callback from RPC telling us whether a reply was
426 * received or some error occurred (timeout or socket shutdown).
427 */
428 static void
429 nfs_readpage_result(struct rpc_task *task)
430 {
431 struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
432 struct inode *inode = data->inode;
433 int count = data->res.count;
434
435 dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
436 task->tk_pid, task->tk_status);
437
438 nfs_refresh_inode(inode, &data->fattr);
439 while (!list_empty(&data->pages)) {
440 struct nfs_page *req = nfs_list_entry(data->pages.next);
441 struct page *page = req->wb_page;
442 nfs_list_remove_request(req);
443
444 if (task->tk_status >= 0 && count >= 0) {
445 SetPageUptodate(page);
446 count -= PAGE_CACHE_SIZE;
447 } else
448 SetPageError(page);
449 flush_dcache_page(page);
450 kunmap(page);
451 UnlockPage(page);
452
453 dprintk("NFS: read (%x/%Ld %d@%Ld)\n",
454 req->wb_inode->i_dev,
455 (long long)NFS_FILEID(req->wb_inode),
456 req->wb_bytes,
457 (long long)(page_offset(page) + req->wb_offset));
458 nfs_unlock_request(req);
459 nfs_release_request(req);
460 }
461 }
462
463 /*
464 * Read a page over NFS.
465 * We read the page synchronously in the following cases:
466 * - The NFS rsize is smaller than PAGE_CACHE_SIZE. We could kludge our way
467 * around this by creating several consecutive read requests, but
468 * that's hardly worth it.
469 * - The error flag is set for this page. This happens only when a
470 * previous async read operation failed.
471 */
472 int
473 nfs_readpage(struct file *file, struct page *page)
474 {
475 struct inode *inode;
476 int error;
477
478 if (!file) {
479 struct address_space *mapping = page->mapping;
480 if (!mapping)
481 BUG();
482 inode = mapping->host;
483 } else
484 inode = file->f_dentry->d_inode;
485 if (!inode)
486 BUG();
487
488 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
489 page, PAGE_CACHE_SIZE, page->index);
490 /*
491 * Try to flush any pending writes to the file..
492 *
493 * NOTE! Because we own the page lock, there cannot
494 * be any new pending writes generated at this point
495 * for this page (other pages can be written to).
496 */
497 error = nfs_wb_page(inode, page);
498 if (error)
499 goto out_error;
500
501 error = -1;
502 if (!PageError(page) && NFS_SERVER(inode)->rsize >= PAGE_CACHE_SIZE)
503 error = nfs_readpage_async(file, inode, page);
504 if (error >= 0)
505 goto out;
506
507 error = nfs_readpage_sync(file, inode, page);
508 if (error < 0 && IS_SWAPFILE(inode))
509 printk("Aiee.. nfs swap-in of page failed!\n");
510 out:
511 return error;
512
513 out_error:
514 UnlockPage(page);
515 goto out;
516 }
517
518 int nfs_init_readpagecache(void)
519 {
520 nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
521 sizeof(struct nfs_read_data),
522 0, SLAB_HWCACHE_ALIGN,
523 NULL, NULL);
524 if (nfs_rdata_cachep == NULL)
525 return -ENOMEM;
526
527 return 0;
528 }
529
530 void nfs_destroy_readpagecache(void)
531 {
532 if (kmem_cache_destroy(nfs_rdata_cachep))
533 printk(KERN_INFO "nfs_read_data: not all structures were freed\n");
534 }
535
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.