~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/fs/select.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * This file contains the procedures for the handling of select and poll
  3  *
  4  * Created for Linux based loosely upon Mathius Lattner's minix
  5  * patches by Peter MacDonald. Heavily edited by Linus.
  6  *
  7  *  4 February 1994
  8  *     COFF/ELF binary emulation. If the process has the STICKY_TIMEOUTS
  9  *     flag set in its personality we do *not* modify the given timeout
 10  *     parameter to reflect time remaining.
 11  *
 12  *  24 January 2000
 13  *     Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation 
 14  *     of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
 15  */
 16 
 17 #include <linux/malloc.h>
 18 #include <linux/smp_lock.h>
 19 #include <linux/poll.h>
 20 #include <linux/file.h>
 21 
 22 #include <asm/uaccess.h>
 23 
 24 #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
 25 #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
 26 
 27 struct poll_table_entry {
 28         struct file * filp;
 29         wait_queue_t wait;
 30         wait_queue_head_t * wait_address;
 31 };
 32 
 33 struct poll_table_page {
 34         struct poll_table_page * next;
 35         struct poll_table_entry * entry;
 36         struct poll_table_entry entries[0];
 37 };
 38 
 39 #define POLL_TABLE_FULL(table) \
 40         ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
 41 
 42 /*
 43  * Ok, Peter made a complicated, but straightforward multiple_wait() function.
 44  * I have rewritten this, taking some shortcuts: This code may not be easy to
 45  * follow, but it should be free of race-conditions, and it's practical. If you
 46  * understand what I'm doing here, then you understand how the linux
 47  * sleep/wakeup mechanism works.
 48  *
 49  * Two very simple procedures, poll_wait() and poll_freewait() make all the
 50  * work.  poll_wait() is an inline-function defined in <linux/poll.h>,
 51  * as all select/poll functions have to call it to add an entry to the
 52  * poll table.
 53  */
 54 
 55 void poll_freewait(poll_table* pt)
 56 {
 57         struct poll_table_page * p = pt->table;
 58         while (p) {
 59                 struct poll_table_entry * entry;
 60                 struct poll_table_page *old;
 61 
 62                 entry = p->entry;
 63                 do {
 64                         entry--;
 65                         remove_wait_queue(entry->wait_address,&entry->wait);
 66                         fput(entry->filp);
 67                 } while (entry > p->entries);
 68                 old = p;
 69                 p = p->next;
 70                 free_page((unsigned long) old);
 71         }
 72 }
 73 
 74 void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
 75 {
 76         struct poll_table_page *table = p->table;
 77 
 78         if (!table || POLL_TABLE_FULL(table)) {
 79                 struct poll_table_page *new_table;
 80 
 81                 new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
 82                 if (!new_table) {
 83                         p->error = -ENOMEM;
 84                         __set_current_state(TASK_RUNNING);
 85                         return;
 86                 }
 87                 new_table->entry = new_table->entries;
 88                 new_table->next = table;
 89                 p->table = new_table;
 90                 table = new_table;
 91         }
 92 
 93         /* Add a new entry */
 94         {
 95                 struct poll_table_entry * entry = table->entry;
 96                 table->entry = entry+1;
 97                 get_file(filp);
 98                 entry->filp = filp;
 99                 entry->wait_address = wait_address;
100                 init_waitqueue_entry(&entry->wait, current);
101                 add_wait_queue(wait_address,&entry->wait);
102         }
103 }
104 
105 #define __IN(fds, n)            (fds->in + n)
106 #define __OUT(fds, n)           (fds->out + n)
107 #define __EX(fds, n)            (fds->ex + n)
108 #define __RES_IN(fds, n)        (fds->res_in + n)
109 #define __RES_OUT(fds, n)       (fds->res_out + n)
110 #define __RES_EX(fds, n)        (fds->res_ex + n)
111 
112 #define BITS(fds, n)            (*__IN(fds, n)|*__OUT(fds, n)|*__EX(fds, n))
113 
114 static int max_select_fd(unsigned long n, fd_set_bits *fds)
115 {
116         unsigned long *open_fds;
117         unsigned long set;
118         int max;
119 
120         /* handle last in-complete long-word first */
121         set = ~(~0UL << (n & (__NFDBITS-1)));
122         n /= __NFDBITS;
123         open_fds = current->files->open_fds->fds_bits+n;
124         max = 0;
125         if (set) {
126                 set &= BITS(fds, n);
127                 if (set) {
128                         if (!(set & ~*open_fds))
129                                 goto get_max;
130                         return -EBADF;
131                 }
132         }
133         while (n) {
134                 open_fds--;
135                 n--;
136                 set = BITS(fds, n);
137                 if (!set)
138                         continue;
139                 if (set & ~*open_fds)
140                         return -EBADF;
141                 if (max)
142                         continue;
143 get_max:
144                 do {
145                         max++;
146                         set >>= 1;
147                 } while (set);
148                 max += n * __NFDBITS;
149         }
150 
151         return max;
152 }
153 
154 #define BIT(i)          (1UL << ((i)&(__NFDBITS-1)))
155 #define MEM(i,m)        ((m)+(unsigned)(i)/__NFDBITS)
156 #define ISSET(i,m)      (((i)&*(m)) != 0)
157 #define SET(i,m)        (*(m) |= (i))
158 
159 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR)
160 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
161 #define POLLEX_SET (POLLPRI)
162 
163 int do_select(int n, fd_set_bits *fds, long *timeout)
164 {
165         poll_table table, *wait;
166         int retval, i, off;
167         long __timeout = *timeout;
168 
169         read_lock(&current->files->file_lock);
170         retval = max_select_fd(n, fds);
171         read_unlock(&current->files->file_lock);
172 
173         if (retval < 0)
174                 return retval;
175         n = retval;
176 
177         poll_initwait(&table);
178         wait = &table;
179         if (!__timeout)
180                 wait = NULL;
181         retval = 0;
182         for (;;) {
183                 set_current_state(TASK_INTERRUPTIBLE);
184                 for (i = 0 ; i < n; i++) {
185                         unsigned long bit = BIT(i);
186                         unsigned long mask;
187                         struct file *file;
188 
189                         off = i / __NFDBITS;
190                         if (!(bit & BITS(fds, off)))
191                                 continue;
192                         file = fget(i);
193                         mask = POLLNVAL;
194                         if (file) {
195                                 mask = DEFAULT_POLLMASK;
196                                 if (file->f_op && file->f_op->poll)
197                                         mask = file->f_op->poll(file, wait);
198                                 fput(file);
199                         }
200                         if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {
201                                 SET(bit, __RES_IN(fds,off));
202                                 retval++;
203                                 wait = NULL;
204                         }
205                         if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) {
206                                 SET(bit, __RES_OUT(fds,off));
207                                 retval++;
208                                 wait = NULL;
209                         }
210                         if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {
211                                 SET(bit, __RES_EX(fds,off));
212                                 retval++;
213                                 wait = NULL;
214                         }
215                 }
216                 wait = NULL;
217                 if (retval || !__timeout || signal_pending(current))
218                         break;
219                 if(table.error) {
220                         retval = table.error;
221                         break;
222                 }
223                 __timeout = schedule_timeout(__timeout);
224         }
225         current->state = TASK_RUNNING;
226 
227         poll_freewait(&table);
228 
229         /*
230          * Up-to-date the caller timeout.
231          */
232         *timeout = __timeout;
233         return retval;
234 }
235 
236 static void *select_bits_alloc(int size)
237 {
238         return kmalloc(6 * size, GFP_KERNEL);
239 }
240 
241 static void select_bits_free(void *bits, int size)
242 {
243         kfree(bits);
244 }
245 
246 /*
247  * We can actually return ERESTARTSYS instead of EINTR, but I'd
248  * like to be certain this leads to no problems. So I return
249  * EINTR just for safety.
250  *
251  * Update: ERESTARTSYS breaks at least the xview clock binary, so
252  * I'm trying ERESTARTNOHAND which restart only when you want to.
253  */
254 #define MAX_SELECT_SECONDS \
255         ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
256 
257 asmlinkage long
258 sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
259 {
260         fd_set_bits fds;
261         char *bits;
262         long timeout;
263         int ret, size;
264 
265         timeout = MAX_SCHEDULE_TIMEOUT;
266         if (tvp) {
267                 time_t sec, usec;
268 
269                 if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp)))
270                     || (ret = __get_user(sec, &tvp->tv_sec))
271                     || (ret = __get_user(usec, &tvp->tv_usec)))
272                         goto out_nofds;
273 
274                 ret = -EINVAL;
275                 if (sec < 0 || usec < 0)
276                         goto out_nofds;
277 
278                 if ((unsigned long) sec < MAX_SELECT_SECONDS) {
279                         timeout = ROUND_UP(usec, 1000000/HZ);
280                         timeout += sec * (unsigned long) HZ;
281                 }
282         }
283 
284         ret = -EINVAL;
285         if (n < 0)
286                 goto out_nofds;
287 
288         if (n > current->files->max_fdset)
289                 n = current->files->max_fdset;
290 
291         /*
292          * We need 6 bitmaps (in/out/ex for both incoming and outgoing),
293          * since we used fdset we need to allocate memory in units of
294          * long-words. 
295          */
296         ret = -ENOMEM;
297         size = FDS_BYTES(n);
298         bits = select_bits_alloc(size);
299         if (!bits)
300                 goto out_nofds;
301         fds.in      = (unsigned long *)  bits;
302         fds.out     = (unsigned long *) (bits +   size);
303         fds.ex      = (unsigned long *) (bits + 2*size);
304         fds.res_in  = (unsigned long *) (bits + 3*size);
305         fds.res_out = (unsigned long *) (bits + 4*size);
306         fds.res_ex  = (unsigned long *) (bits + 5*size);
307 
308         if ((ret = get_fd_set(n, inp, fds.in)) ||
309             (ret = get_fd_set(n, outp, fds.out)) ||
310             (ret = get_fd_set(n, exp, fds.ex)))
311                 goto out;
312         zero_fd_set(n, fds.res_in);
313         zero_fd_set(n, fds.res_out);
314         zero_fd_set(n, fds.res_ex);
315 
316         ret = do_select(n, &fds, &timeout);
317 
318         if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
319                 time_t sec = 0, usec = 0;
320                 if (timeout) {
321                         sec = timeout / HZ;
322                         usec = timeout % HZ;
323                         usec *= (1000000/HZ);
324                 }
325                 put_user(sec, &tvp->tv_sec);
326                 put_user(usec, &tvp->tv_usec);
327         }
328 
329         if (ret < 0)
330                 goto out;
331         if (!ret) {
332                 ret = -ERESTARTNOHAND;
333                 if (signal_pending(current))
334                         goto out;
335                 ret = 0;
336         }
337 
338         set_fd_set(n, inp, fds.res_in);
339         set_fd_set(n, outp, fds.res_out);
340         set_fd_set(n, exp, fds.res_ex);
341 
342 out:
343         select_bits_free(bits, size);
344 out_nofds:
345         return ret;
346 }
347 
348 #define POLLFD_PER_PAGE  ((PAGE_SIZE) / sizeof(struct pollfd))
349 
350 static void do_pollfd(unsigned int num, struct pollfd * fdpage,
351         poll_table ** pwait, int *count)
352 {
353         int i;
354 
355         for (i = 0; i < num; i++) {
356                 int fd;
357                 unsigned int mask;
358                 struct pollfd *fdp;
359 
360                 mask = 0;
361                 fdp = fdpage+i;
362                 fd = fdp->fd;
363                 if (fd >= 0) {
364                         struct file * file = fget(fd);
365                         mask = POLLNVAL;
366                         if (file != NULL) {
367                                 mask = DEFAULT_POLLMASK;
368                                 if (file->f_op && file->f_op->poll)
369                                         mask = file->f_op->poll(file, *pwait);
370                                 mask &= fdp->events | POLLERR | POLLHUP;
371                                 fput(file);
372                         }
373                         if (mask) {
374                                 *pwait = NULL;
375                                 (*count)++;
376                         }
377                 }
378                 fdp->revents = mask;
379         }
380 }
381 
382 static int do_poll(unsigned int nfds, unsigned int nchunks, unsigned int nleft, 
383         struct pollfd *fds[], poll_table *wait, long timeout)
384 {
385         int count;
386         poll_table* pt = wait;
387 
388         for (;;) {
389                 unsigned int i;
390 
391                 set_current_state(TASK_INTERRUPTIBLE);
392                 count = 0;
393                 for (i=0; i < nchunks; i++)
394                         do_pollfd(POLLFD_PER_PAGE, fds[i], &pt, &count);
395                 if (nleft)
396                         do_pollfd(nleft, fds[nchunks], &pt, &count);
397                 pt = NULL;
398                 if (count || !timeout || signal_pending(current))
399                         break;
400                 count = wait->error;
401                 if (count)
402                         break;
403                 timeout = schedule_timeout(timeout);
404         }
405         current->state = TASK_RUNNING;
406         return count;
407 }
408 
409 asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout)
410 {
411         int i, j, fdcount, err;
412         struct pollfd **fds;
413         poll_table table, *wait;
414         int nchunks, nleft;
415 
416         /* Do a sanity check on nfds ... */
417         if (nfds > current->files->max_fds)
418                 return -EINVAL;
419 
420         if (timeout) {
421                 /* Careful about overflow in the intermediate values */
422                 if ((unsigned long) timeout < MAX_SCHEDULE_TIMEOUT / HZ)
423                         timeout = (unsigned long)(timeout*HZ+999)/1000+1;
424                 else /* Negative or overflow */
425                         timeout = MAX_SCHEDULE_TIMEOUT;
426         }
427 
428         poll_initwait(&table);
429         wait = &table;
430         if (!timeout)
431                 wait = NULL;
432 
433         err = -ENOMEM;
434         fds = NULL;
435         if (nfds != 0) {
436                 fds = (struct pollfd **)kmalloc(
437                         (1 + (nfds - 1) / POLLFD_PER_PAGE) * sizeof(struct pollfd *),
438                         GFP_KERNEL);
439                 if (fds == NULL)
440                         goto out;
441         }
442 
443         nchunks = 0;
444         nleft = nfds;
445         while (nleft > POLLFD_PER_PAGE) { /* allocate complete PAGE_SIZE chunks */
446                 fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL);
447                 if (fds[nchunks] == NULL)
448                         goto out_fds;
449                 nchunks++;
450                 nleft -= POLLFD_PER_PAGE;
451         }
452         if (nleft) { /* allocate last PAGE_SIZE chunk, only nleft elements used */
453                 fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL);
454                 if (fds[nchunks] == NULL)
455                         goto out_fds;
456         }
457 
458         err = -EFAULT;
459         for (i=0; i < nchunks; i++)
460                 if (copy_from_user(fds[i], ufds + i*POLLFD_PER_PAGE, PAGE_SIZE))
461                         goto out_fds1;
462         if (nleft) {
463                 if (copy_from_user(fds[nchunks], ufds + nchunks*POLLFD_PER_PAGE, 
464                                 nleft * sizeof(struct pollfd)))
465                         goto out_fds1;
466         }
467 
468         fdcount = do_poll(nfds, nchunks, nleft, fds, wait, timeout);
469 
470         /* OK, now copy the revents fields back to user space. */
471         for(i=0; i < nchunks; i++)
472                 for (j=0; j < POLLFD_PER_PAGE; j++, ufds++)
473                         __put_user((fds[i] + j)->revents, &ufds->revents);
474         if (nleft)
475                 for (j=0; j < nleft; j++, ufds++)
476                         __put_user((fds[nchunks] + j)->revents, &ufds->revents);
477 
478         err = fdcount;
479         if (!fdcount && signal_pending(current))
480                 err = -EINTR;
481 
482 out_fds1:
483         if (nleft)
484                 free_page((unsigned long)(fds[nchunks]));
485 out_fds:
486         for (i=0; i < nchunks; i++)
487                 free_page((unsigned long)(fds[i]));
488         if (nfds != 0)
489                 kfree(fds);
490 out:
491         poll_freewait(&table);
492         return err;
493 }
494 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.