~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/net/socket.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * NET          An implementation of the SOCKET network access protocol.
  3  *
  4  * Version:     @(#)socket.c    1.1.93  18/02/95
  5  *
  6  * Authors:     Orest Zborowski, <obz@Kodak.COM>
  7  *              Ross Biro, <bir7@leland.Stanford.Edu>
  8  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  9  *
 10  * Fixes:
 11  *              Anonymous       :       NOTSOCK/BADF cleanup. Error fix in
 12  *                                      shutdown()
 13  *              Alan Cox        :       verify_area() fixes
 14  *              Alan Cox        :       Removed DDI
 15  *              Jonathan Kamens :       SOCK_DGRAM reconnect bug
 16  *              Alan Cox        :       Moved a load of checks to the very
 17  *                                      top level.
 18  *              Alan Cox        :       Move address structures to/from user
 19  *                                      mode above the protocol layers.
 20  *              Rob Janssen     :       Allow 0 length sends.
 21  *              Alan Cox        :       Asynchronous I/O support (cribbed from the
 22  *                                      tty drivers).
 23  *              Niibe Yutaka    :       Asynchronous I/O for writes (4.4BSD style)
 24  *              Jeff Uphoff     :       Made max number of sockets command-line
 25  *                                      configurable.
 26  *              Matti Aarnio    :       Made the number of sockets dynamic,
 27  *                                      to be allocated when needed, and mr.
 28  *                                      Uphoff's max is used as max to be
 29  *                                      allowed to allocate.
 30  *              Linus           :       Argh. removed all the socket allocation
 31  *                                      altogether: it's in the inode now.
 32  *              Alan Cox        :       Made sock_alloc()/sock_release() public
 33  *                                      for NetROM and future kernel nfsd type
 34  *                                      stuff.
 35  *              Alan Cox        :       sendmsg/recvmsg basics.
 36  *              Tom Dyas        :       Export net symbols.
 37  *              Marcin Dalecki  :       Fixed problems with CONFIG_NET="n".
 38  *              Alan Cox        :       Added thread locking to sys_* calls
 39  *                                      for sockets. May have errors at the
 40  *                                      moment.
 41  *              Kevin Buhr      :       Fixed the dumb errors in the above.
 42  *              Andi Kleen      :       Some small cleanups, optimizations,
 43  *                                      and fixed a copy_from_user() bug.
 44  *              Tigran Aivazian :       sys_send(args) calls sys_sendto(args, NULL, 0)
 45  *              Tigran Aivazian :       Made listen(2) backlog sanity checks 
 46  *                                      protocol-independent
 47  *
 48  *
 49  *              This program is free software; you can redistribute it and/or
 50  *              modify it under the terms of the GNU General Public License
 51  *              as published by the Free Software Foundation; either version
 52  *              2 of the License, or (at your option) any later version.
 53  *
 54  *
 55  *      This module is effectively the top level interface to the BSD socket
 56  *      paradigm. 
 57  *
 58  */
 59 
 60 #include <linux/config.h>
 61 #include <linux/mm.h>
 62 #include <linux/smp_lock.h>
 63 #include <linux/socket.h>
 64 #include <linux/file.h>
 65 #include <linux/net.h>
 66 #include <linux/interrupt.h>
 67 #include <linux/netdevice.h>
 68 #include <linux/proc_fs.h>
 69 #include <linux/wanrouter.h>
 70 #include <linux/init.h>
 71 #include <linux/poll.h>
 72 #include <linux/cache.h>
 73 #include <linux/module.h>
 74 
 75 #if defined(CONFIG_KMOD) && defined(CONFIG_NET)
 76 #include <linux/kmod.h>
 77 #endif
 78 
 79 #include <asm/uaccess.h>
 80 
 81 #include <linux/inet.h>
 82 #include <net/ip.h>
 83 #include <net/sock.h>
 84 #include <net/tcp.h>
 85 #include <net/udp.h>
 86 #include <net/scm.h>
 87 #include <linux/netfilter.h>
 88 
 89 static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
 90 static loff_t sock_lseek(struct file *file, loff_t offset, int whence);
 91 static ssize_t sock_read(struct file *file, char *buf,
 92                          size_t size, loff_t *ppos);
 93 static ssize_t sock_write(struct file *file, const char *buf,
 94                           size_t size, loff_t *ppos);
 95 static int sock_mmap(struct file *file, struct vm_area_struct * vma);
 96 
 97 static int sock_close(struct inode *inode, struct file *file);
 98 static unsigned int sock_poll(struct file *file,
 99                               struct poll_table_struct *wait);
100 static int sock_ioctl(struct inode *inode, struct file *file,
101                       unsigned int cmd, unsigned long arg);
102 static int sock_fasync(int fd, struct file *filp, int on);
103 static ssize_t sock_readv(struct file *file, const struct iovec *vector,
104                           unsigned long count, loff_t *ppos);
105 static ssize_t sock_writev(struct file *file, const struct iovec *vector,
106                           unsigned long count, loff_t *ppos);
107 
108 
109 /*
110  *      Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
111  *      in the operation structures but are done directly via the socketcall() multiplexor.
112  */
113 
114 static struct file_operations socket_file_ops = {
115         llseek:         sock_lseek,
116         read:           sock_read,
117         write:          sock_write,
118         poll:           sock_poll,
119         ioctl:          sock_ioctl,
120         mmap:           sock_mmap,
121         open:           sock_no_open,   /* special open code to disallow open via /proc */
122         release:        sock_close,
123         fasync:         sock_fasync,
124         readv:          sock_readv,
125         writev:         sock_writev
126 };
127 
128 /*
129  *      The protocol list. Each protocol is registered in here.
130  */
131 
132 static struct net_proto_family *net_families[NPROTO];
133 
134 #ifdef CONFIG_SMP
135 static atomic_t net_family_lockct = ATOMIC_INIT(0);
136 static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;
137 
138 /* The strategy is: modifications net_family vector are short, do not
139    sleep and veeery rare, but read access should be free of any exclusive
140    locks.
141  */
142 
143 static void net_family_write_lock(void)
144 {
145         spin_lock(&net_family_lock);
146         while (atomic_read(&net_family_lockct) != 0) {
147                 spin_unlock(&net_family_lock);
148 
149                 current->policy |= SCHED_YIELD;
150                 schedule();
151 
152                 spin_lock(&net_family_lock);
153         }
154 }
155 
156 static __inline__ void net_family_write_unlock(void)
157 {
158         spin_unlock(&net_family_lock);
159 }
160 
161 static __inline__ void net_family_read_lock(void)
162 {
163         atomic_inc(&net_family_lockct);
164         spin_unlock_wait(&net_family_lock);
165 }
166 
167 static __inline__ void net_family_read_unlock(void)
168 {
169         atomic_dec(&net_family_lockct);
170 }
171 
172 #else
173 #define net_family_write_lock() do { } while(0)
174 #define net_family_write_unlock() do { } while(0)
175 #define net_family_read_lock() do { } while(0)
176 #define net_family_read_unlock() do { } while(0)
177 #endif
178 
179 
180 /*
181  *      Statistics counters of the socket lists
182  */
183 
184 static union {
185         int     counter;
186         char    __pad[SMP_CACHE_BYTES];
187 } sockets_in_use[NR_CPUS] __cacheline_aligned = {{0}};
188 
189 /*
190  *      Support routines. Move socket addresses back and forth across the kernel/user
191  *      divide and look after the messy bits.
192  */
193 
194 #define MAX_SOCK_ADDR   128             /* 108 for Unix domain - 
195                                            16 for IP, 16 for IPX,
196                                            24 for IPv6,
197                                            about 80 for AX.25 
198                                            must be at least one bigger than
199                                            the AF_UNIX size (see net/unix/af_unix.c
200                                            :unix_mkname()).  
201                                          */
202                                          
203 /**
204  *      move_addr_to_kernel     -       copy a socket address into kernel space
205  *      @uaddr: Address in user space
206  *      @kaddr: Address in kernel space
207  *      @ulen: Length in user space
208  *
209  *      The address is copied into kernel space. If the provided address is
210  *      too long an error code of -EINVAL is returned. If the copy gives
211  *      invalid addresses -EFAULT is returned. On a success 0 is returned.
212  */
213 
214 int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr)
215 {
216         if(ulen<0||ulen>MAX_SOCK_ADDR)
217                 return -EINVAL;
218         if(ulen==0)
219                 return 0;
220         if(copy_from_user(kaddr,uaddr,ulen))
221                 return -EFAULT;
222         return 0;
223 }
224 
225 /**
226  *      move_addr_to_user       -       copy an address to user space
227  *      @kaddr: kernel space address
228  *      @klen: length of address in kernel
229  *      @uaddr: user space address
230  *      @ulen: pointer to user length field
231  *
232  *      The value pointed to by ulen on entry is the buffer length available.
233  *      This is overwritten with the buffer space used. -EINVAL is returned
234  *      if an overlong buffer is specified or a negative buffer size. -EFAULT
235  *      is returned if either the buffer or the length field are not
236  *      accessible.
237  *      After copying the data up to the limit the user specifies, the true
238  *      length of the data is written over the length limit the user
239  *      specified. Zero is returned for a success.
240  */
241  
242 int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen)
243 {
244         int err;
245         int len;
246 
247         if((err=get_user(len, ulen)))
248                 return err;
249         if(len>klen)
250                 len=klen;
251         if(len<0 || len> MAX_SOCK_ADDR)
252                 return -EINVAL;
253         if(len)
254         {
255                 if(copy_to_user(uaddr,kaddr,len))
256                         return -EFAULT;
257         }
258         /*
259          *      "fromlen shall refer to the value before truncation.."
260          *                      1003.1g
261          */
262         return __put_user(klen, ulen);
263 }
264 
265 #define SOCKFS_MAGIC 0x534F434B
266 static int sockfs_statfs(struct super_block *sb, struct statfs *buf)
267 {
268         buf->f_type = SOCKFS_MAGIC;
269         buf->f_bsize = 1024;
270         buf->f_namelen = 255;
271         return 0;
272 }
273 
274 static struct super_operations sockfs_ops = {
275         statfs:         sockfs_statfs,
276 };
277 
278 static struct super_block * sockfs_read_super(struct super_block *sb, void *data, int silent)
279 {
280         struct inode *root = new_inode(sb);
281         if (!root)
282                 return NULL;
283         root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
284         root->i_uid = root->i_gid = 0;
285         root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
286         sb->s_blocksize = 1024;
287         sb->s_blocksize_bits = 10;
288         sb->s_magic = SOCKFS_MAGIC;
289         sb->s_op        = &sockfs_ops;
290         sb->s_root = d_alloc(NULL, &(const struct qstr) { "socket:", 7, 0 });
291         if (!sb->s_root) {
292                 iput(root);
293                 return NULL;
294         }
295         sb->s_root->d_sb = sb;
296         sb->s_root->d_parent = sb->s_root;
297         d_instantiate(sb->s_root, root);
298         return sb;
299 }
300 
301 static struct vfsmount *sock_mnt;
302 static DECLARE_FSTYPE(sock_fs_type, "sockfs", sockfs_read_super,
303         FS_NOMOUNT|FS_SINGLE);
304 static int sockfs_delete_dentry(struct dentry *dentry)
305 {
306         return 1;
307 }
308 static struct dentry_operations sockfs_dentry_operations = {
309         d_delete:       sockfs_delete_dentry,
310 };
311 
312 /*
313  *      Obtains the first available file descriptor and sets it up for use.
314  *
315  *      This functions creates file structure and maps it to fd space
316  *      of current process. On success it returns file descriptor
317  *      and file struct implicitly stored in sock->file.
318  *      Note that another thread may close file descriptor before we return
319  *      from this function. We use the fact that now we do not refer
320  *      to socket after mapping. If one day we will need it, this
321  *      function will inincrement ref. count on file by 1.
322  *
323  *      In any case returned fd MAY BE not valid!
324  *      This race condition is inavoidable
325  *      with shared fd spaces, we cannot solve is inside kernel,
326  *      but we take care of internal coherence yet.
327  */
328 
329 static int sock_map_fd(struct socket *sock)
330 {
331         int fd;
332         struct qstr this;
333         char name[32];
334 
335         /*
336          *      Find a file descriptor suitable for return to the user. 
337          */
338 
339         fd = get_unused_fd();
340         if (fd >= 0) {
341                 struct file *file = get_empty_filp();
342 
343                 if (!file) {
344                         put_unused_fd(fd);
345                         fd = -ENFILE;
346                         goto out;
347                 }
348 
349                 sprintf(name, "[%lu]", sock->inode->i_ino);
350                 this.name = name;
351                 this.len = strlen(name);
352                 this.hash = sock->inode->i_ino;
353 
354                 file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
355                 if (!file->f_dentry) {
356                         put_filp(file);
357                         put_unused_fd(fd);
358                         fd = -ENOMEM;
359                         goto out;
360                 }
361                 file->f_dentry->d_op = &sockfs_dentry_operations;
362                 d_add(file->f_dentry, sock->inode);
363                 file->f_vfsmnt = mntget(sock_mnt);
364 
365                 sock->file = file;
366                 file->f_op = sock->inode->i_fop = &socket_file_ops;
367                 file->f_mode = 3;
368                 file->f_flags = O_RDWR;
369                 file->f_pos = 0;
370                 fd_install(fd, file);
371         }
372 
373 out:
374         return fd;
375 }
376 
377 extern __inline__ struct socket *socki_lookup(struct inode *inode)
378 {
379         return &inode->u.socket_i;
380 }
381 
382 /**
383  *      sockfd_lookup   -       Go from a file number to its socket slot
384  *      @fd: file handle
385  *      @err: pointer to an error code return
386  *
387  *      The file handle passed in is locked and the socket it is bound
388  *      too is returned. If an error occurs the err pointer is overwritten
389  *      with a negative errno code and NULL is returned. The function checks
390  *      for both invalid handles and passing a handle which is not a socket.
391  *
392  *      On a success the socket object pointer is returned.
393  */
394 
395 struct socket *sockfd_lookup(int fd, int *err)
396 {
397         struct file *file;
398         struct inode *inode;
399         struct socket *sock;
400 
401         if (!(file = fget(fd)))
402         {
403                 *err = -EBADF;
404                 return NULL;
405         }
406 
407         inode = file->f_dentry->d_inode;
408         if (!inode->i_sock || !(sock = socki_lookup(inode)))
409         {
410                 *err = -ENOTSOCK;
411                 fput(file);
412                 return NULL;
413         }
414 
415         if (sock->file != file) {
416                 printk(KERN_ERR "socki_lookup: socket file changed!\n");
417                 sock->file = file;
418         }
419         return sock;
420 }
421 
422 extern __inline__ void sockfd_put(struct socket *sock)
423 {
424         fput(sock->file);
425 }
426 
427 /**
428  *      sock_alloc      -       allocate a socket
429  *      
430  *      Allocate a new inode and socket object. The two are bound together
431  *      and initialised. The socket is then returned. If we are out of inodes
432  *      NULL is returned.
433  */
434 
435 struct socket *sock_alloc(void)
436 {
437         struct inode * inode;
438         struct socket * sock;
439 
440         inode = get_empty_inode();
441         if (!inode)
442                 return NULL;
443 
444         inode->i_sb = sock_mnt->mnt_sb;
445         sock = socki_lookup(inode);
446 
447         inode->i_mode = S_IFSOCK|S_IRWXUGO;
448         inode->i_sock = 1;
449         inode->i_uid = current->fsuid;
450         inode->i_gid = current->fsgid;
451 
452         sock->inode = inode;
453         init_waitqueue_head(&sock->wait);
454         sock->fasync_list = NULL;
455         sock->state = SS_UNCONNECTED;
456         sock->flags = 0;
457         sock->ops = NULL;
458         sock->sk = NULL;
459         sock->file = NULL;
460 
461         sockets_in_use[smp_processor_id()].counter++;
462         return sock;
463 }
464 
465 /*
466  *      In theory you can't get an open on this inode, but /proc provides
467  *      a back door. Remember to keep it shut otherwise you'll let the
468  *      creepy crawlies in.
469  */
470   
471 static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
472 {
473         return -ENXIO;
474 }
475 
476 /**
477  *      sock_release    -       close a socket
478  *      @sock: socket to close
479  *
480  *      The socket is released from the protocol stack if it has a release
481  *      callback, and the inode is then released if the socket is bound to
482  *      an inode not a file. 
483  */
484  
485 void sock_release(struct socket *sock)
486 {
487         if (sock->ops) 
488                 sock->ops->release(sock);
489 
490         if (sock->fasync_list)
491                 printk(KERN_ERR "sock_release: fasync list not empty!\n");
492 
493         sockets_in_use[smp_processor_id()].counter--;
494         if (!sock->file) {
495                 iput(sock->inode);
496                 return;
497         }
498         sock->file=NULL;
499 }
500 
501 int sock_sendmsg(struct socket *sock, struct msghdr *msg, int size)
502 {
503         int err;
504         struct scm_cookie scm;
505 
506         err = scm_send(sock, msg, &scm);
507         if (err >= 0) {
508                 err = sock->ops->sendmsg(sock, msg, size, &scm);
509                 scm_destroy(&scm);
510         }
511         return err;
512 }
513 
514 int sock_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags)
515 {
516         struct scm_cookie scm;
517 
518         memset(&scm, 0, sizeof(scm));
519 
520         size = sock->ops->recvmsg(sock, msg, size, flags, &scm);
521         if (size >= 0)
522                 scm_recv(sock, msg, &scm, flags);
523 
524         return size;
525 }
526 
527 
528 /*
529  *      Sockets are not seekable.
530  */
531 
532 static loff_t sock_lseek(struct file *file, loff_t offset, int whence)
533 {
534         return -ESPIPE;
535 }
536 
537 /*
538  *      Read data from a socket. ubuf is a user mode pointer. We make sure the user
539  *      area ubuf...ubuf+size-1 is writable before asking the protocol.
540  */
541 
542 static ssize_t sock_read(struct file *file, char *ubuf,
543                          size_t size, loff_t *ppos)
544 {
545         struct socket *sock;
546         struct iovec iov;
547         struct msghdr msg;
548         int flags;
549 
550         if (ppos != &file->f_pos)
551                 return -ESPIPE;
552         if (size==0)            /* Match SYS5 behaviour */
553                 return 0;
554 
555         sock = socki_lookup(file->f_dentry->d_inode); 
556 
557         msg.msg_name=NULL;
558         msg.msg_namelen=0;
559         msg.msg_iov=&iov;
560         msg.msg_iovlen=1;
561         msg.msg_control=NULL;
562         msg.msg_controllen=0;
563         iov.iov_base=ubuf;
564         iov.iov_len=size;
565         flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
566 
567         return sock_recvmsg(sock, &msg, size, flags);
568 }
569 
570 
571 /*
572  *      Write data to a socket. We verify that the user area ubuf..ubuf+size-1
573  *      is readable by the user process.
574  */
575 
576 static ssize_t sock_write(struct file *file, const char *ubuf,
577                           size_t size, loff_t *ppos)
578 {
579         struct socket *sock;
580         struct msghdr msg;
581         struct iovec iov;
582         
583         if (ppos != &file->f_pos)
584                 return -ESPIPE;
585         if(size==0)             /* Match SYS5 behaviour */
586                 return 0;
587 
588         sock = socki_lookup(file->f_dentry->d_inode); 
589 
590         msg.msg_name=NULL;
591         msg.msg_namelen=0;
592         msg.msg_iov=&iov;
593         msg.msg_iovlen=1;
594         msg.msg_control=NULL;
595         msg.msg_controllen=0;
596         msg.msg_flags=!(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
597         if (sock->type == SOCK_SEQPACKET)
598                 msg.msg_flags |= MSG_EOR;
599         iov.iov_base=(void *)ubuf;
600         iov.iov_len=size;
601         
602         return sock_sendmsg(sock, &msg, size);
603 }
604 
605 int sock_readv_writev(int type, struct inode * inode, struct file * file,
606                       const struct iovec * iov, long count, long size)
607 {
608         struct msghdr msg;
609         struct socket *sock;
610 
611         sock = socki_lookup(inode);
612 
613         msg.msg_name = NULL;
614         msg.msg_namelen = 0;
615         msg.msg_control = NULL;
616         msg.msg_controllen = 0;
617         msg.msg_iov = (struct iovec *) iov;
618         msg.msg_iovlen = count;
619         msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
620 
621         /* read() does a VERIFY_WRITE */
622         if (type == VERIFY_WRITE)
623                 return sock_recvmsg(sock, &msg, size, msg.msg_flags);
624 
625         if (sock->type == SOCK_SEQPACKET)
626                 msg.msg_flags |= MSG_EOR;
627 
628         return sock_sendmsg(sock, &msg, size);
629 }
630 
631 static ssize_t sock_readv(struct file *file, const struct iovec *vector,
632                           unsigned long count, loff_t *ppos)
633 {
634         size_t tot_len = 0;
635         int i;
636         for (i = 0 ; i < count ; i++)
637                 tot_len += vector[i].iov_len;
638         return sock_readv_writev(VERIFY_WRITE, file->f_dentry->d_inode,
639                                  file, vector, count, tot_len);
640 }
641         
642 static ssize_t sock_writev(struct file *file, const struct iovec *vector,
643                            unsigned long count, loff_t *ppos)
644 {
645         size_t tot_len = 0;
646         int i;
647         for (i = 0 ; i < count ; i++)
648                 tot_len += vector[i].iov_len;
649         return sock_readv_writev(VERIFY_READ, file->f_dentry->d_inode,
650                                  file, vector, count, tot_len);
651 }
652 
653 /*
654  *      With an ioctl arg may well be a user mode pointer, but we don't know what to do
655  *      with it - that's up to the protocol still.
656  */
657 
658 int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
659            unsigned long arg)
660 {
661         struct socket *sock;
662         int err;
663 
664         unlock_kernel();
665         sock = socki_lookup(inode);
666         err = sock->ops->ioctl(sock, cmd, arg);
667         lock_kernel();
668 
669         return err;
670 }
671 
672 
673 /* No kernel lock held - perfect */
674 static unsigned int sock_poll(struct file *file, poll_table * wait)
675 {
676         struct socket *sock;
677 
678         /*
679          *      We can't return errors to poll, so it's either yes or no. 
680          */
681         sock = socki_lookup(file->f_dentry->d_inode);
682         return sock->ops->poll(file, sock, wait);
683 }
684 
685 static int sock_mmap(struct file * file, struct vm_area_struct * vma)
686 {
687         struct socket *sock = socki_lookup(file->f_dentry->d_inode);
688 
689         return sock->ops->mmap(file, sock, vma);
690 }
691 
692 int sock_close(struct inode *inode, struct file *filp)
693 {
694         /*
695          *      It was possible the inode is NULL we were 
696          *      closing an unfinished socket. 
697          */
698 
699         if (!inode)
700         {
701                 printk(KERN_DEBUG "sock_close: NULL inode\n");
702                 return 0;
703         }
704         sock_fasync(-1, filp, 0);
705         sock_release(socki_lookup(inode));
706         return 0;
707 }
708 
709 /*
710  *      Update the socket async list
711  *
712  *      Fasync_list locking strategy.
713  *
714  *      1. fasync_list is modified only under process context socket lock
715  *         i.e. under semaphore.
716  *      2. fasync_list is used under read_lock(&sk->callback_lock)
717  *         or under socket lock.
718  *      3. fasync_list can be used from softirq context, so that
719  *         modification under socket lock have to be enhanced with
720  *         write_lock_bh(&sk->callback_lock).
721  *                                                      --ANK (990710)
722  */
723 
724 static int sock_fasync(int fd, struct file *filp, int on)
725 {
726         struct fasync_struct *fa, *fna=NULL, **prev;
727         struct socket *sock;
728         struct sock *sk;
729 
730         if (on)
731         {
732                 fna=(struct fasync_struct *)kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
733                 if(fna==NULL)
734                         return -ENOMEM;
735         }
736 
737 
738         sock = socki_lookup(filp->f_dentry->d_inode);
739         
740         if ((sk=sock->sk) == NULL)
741                 return -EINVAL;
742 
743         lock_sock(sk);
744 
745         prev=&(sock->fasync_list);
746 
747         for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
748                 if (fa->fa_file==filp)
749                         break;
750 
751         if(on)
752         {
753                 if(fa!=NULL)
754                 {
755                         write_lock_bh(&sk->callback_lock);
756                         fa->fa_fd=fd;
757                         write_unlock_bh(&sk->callback_lock);
758 
759                         kfree(fna);
760                         goto out;
761                 }
762                 fna->fa_file=filp;
763                 fna->fa_fd=fd;
764                 fna->magic=FASYNC_MAGIC;
765                 fna->fa_next=sock->fasync_list;
766                 write_lock_bh(&sk->callback_lock);
767                 sock->fasync_list=fna;
768                 write_unlock_bh(&sk->callback_lock);
769         }
770         else
771         {
772                 if (fa!=NULL)
773                 {
774                         write_lock_bh(&sk->callback_lock);
775                         *prev=fa->fa_next;
776                         write_unlock_bh(&sk->callback_lock);
777                         kfree(fa);
778                 }
779         }
780 
781 out:
782         release_sock(sock->sk);
783         return 0;
784 }
785 
786 /* This function may be called only under socket lock or callback_lock */
787 
788 int sock_wake_async(struct socket *sock, int how, int band)
789 {
790         if (!sock || !sock->fasync_list)
791                 return -1;
792         switch (how)
793         {
794         case 1:
795                 
796                 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
797                         break;
798                 goto call_kill;
799         case 2:
800                 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
801                         break;
802                 /* fall through */
803         case 0:
804         call_kill:
805                 __kill_fasync(sock->fasync_list, SIGIO, band);
806                 break;
807         case 3:
808                 __kill_fasync(sock->fasync_list, SIGURG, band);
809         }
810         return 0;
811 }
812 
813 
814 int sock_create(int family, int type, int protocol, struct socket **res)
815 {
816         int i;
817         struct socket *sock;
818 
819         /*
820          *      Check protocol is in range
821          */
822         if(family<0 || family>=NPROTO)
823                 return -EAFNOSUPPORT;
824 
825         /* Compatibility.
826 
827            This uglymoron is moved from INET layer to here to avoid
828            deadlock in module load.
829          */
830         if (family == PF_INET && type == SOCK_PACKET) {
831                 static int warned; 
832                 if (!warned) {
833                         warned = 1;
834                         printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);
835                 }
836                 family = PF_PACKET;
837         }
838                 
839 #if defined(CONFIG_KMOD) && defined(CONFIG_NET)
840         /* Attempt to load a protocol module if the find failed. 
841          * 
842          * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user 
843          * requested real, full-featured networking support upon configuration.
844          * Otherwise module support will break!
845          */
846         if (net_families[family]==NULL)
847         {
848                 char module_name[30];
849                 sprintf(module_name,"net-pf-%d",family);
850                 request_module(module_name);
851         }
852 #endif
853 
854         net_family_read_lock();
855         if (net_families[family] == NULL) {
856                 i = -EAFNOSUPPORT;
857                 goto out;
858         }
859 
860 /*
861  *      Allocate the socket and allow the family to set things up. if
862  *      the protocol is 0, the family is instructed to select an appropriate
863  *      default.
864  */
865 
866         if (!(sock = sock_alloc())) 
867         {
868                 printk(KERN_WARNING "socket: no more sockets\n");
869                 i = -ENFILE;            /* Not exactly a match, but its the
870                                            closest posix thing */
871                 goto out;
872         }
873 
874         sock->type  = type;
875 
876         if ((i = net_families[family]->create(sock, protocol)) < 0) 
877         {
878                 sock_release(sock);
879                 goto out;
880         }
881 
882         *res = sock;
883 
884 out:
885         net_family_read_unlock();
886         return i;
887 }
888 
889 asmlinkage long sys_socket(int family, int type, int protocol)
890 {
891         int retval;
892         struct socket *sock;
893 
894         retval = sock_create(family, type, protocol, &sock);
895         if (retval < 0)
896                 goto out;
897 
898         retval = sock_map_fd(sock);
899         if (retval < 0)
900                 goto out_release;
901 
902 out:
903         /* It may be already another descriptor 8) Not kernel problem. */
904         return retval;
905 
906 out_release:
907         sock_release(sock);
908         return retval;
909 }
910 
911 /*
912  *      Create a pair of connected sockets.
913  */
914 
915 asmlinkage long sys_socketpair(int family, int type, int protocol, int usockvec[2])
916 {
917         struct socket *sock1, *sock2;
918         int fd1, fd2, err;
919 
920         /*
921          * Obtain the first socket and check if the underlying protocol
922          * supports the socketpair call.
923          */
924 
925         err = sock_create(family, type, protocol, &sock1);
926         if (err < 0)
927                 goto out;
928 
929         err = sock_create(family, type, protocol, &sock2);
930         if (err < 0)
931                 goto out_release_1;
932 
933         err = sock1->ops->socketpair(sock1, sock2);
934         if (err < 0) 
935                 goto out_release_both;
936 
937         fd1 = fd2 = -1;
938 
939         err = sock_map_fd(sock1);
940         if (err < 0)
941                 goto out_release_both;
942         fd1 = err;
943 
944         err = sock_map_fd(sock2);
945         if (err < 0)
946                 goto out_close_1;
947         fd2 = err;
948 
949         /* fd1 and fd2 may be already another descriptors.
950          * Not kernel problem.
951          */
952 
953         err = put_user(fd1, &usockvec[0]); 
954         if (!err)
955                 err = put_user(fd2, &usockvec[1]);
956         if (!err)
957                 return 0;
958 
959         sys_close(fd2);
960         sys_close(fd1);
961         return err;
962 
963 out_close_1:
964         sock_release(sock2);
965         sys_close(fd1);
966         return err;
967 
968 out_release_both:
969         sock_release(sock2);
970 out_release_1:
971         sock_release(sock1);
972 out:
973         return err;
974 }
975 
976 
977 /*
978  *      Bind a name to a socket. Nothing much to do here since it's
979  *      the protocol's responsibility to handle the local address.
980  *
981  *      We move the socket address to kernel space before we call
982  *      the protocol layer (having also checked the address is ok).
983  */
984 
985 asmlinkage long sys_bind(int fd, struct sockaddr *umyaddr, int addrlen)
986 {
987         struct socket *sock;
988         char address[MAX_SOCK_ADDR];
989         int err;
990 
991         if((sock = sockfd_lookup(fd,&err))!=NULL)
992         {
993                 if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0)
994                         err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen);
995                 sockfd_put(sock);
996         }                       
997         return err;
998 }
999 
1000 
1001 /*
1002  *      Perform a listen. Basically, we allow the protocol to do anything
1003  *      necessary for a listen, and if that works, we mark the socket as
1004  *      ready for listening.
1005  */
1006 
1007 asmlinkage long sys_listen(int fd, int backlog)
1008 {
1009         struct socket *sock;
1010         int err;
1011         
1012         if ((sock = sockfd_lookup(fd, &err)) != NULL) {
1013                 if ((unsigned) backlog > SOMAXCONN)
1014                         backlog = SOMAXCONN;
1015                 err=sock->ops->listen(sock, backlog);
1016                 sockfd_put(sock);
1017         }
1018         return err;
1019 }
1020 
1021 
1022 /*
1023  *      For accept, we attempt to create a new socket, set up the link
1024  *      with the client, wake up the client, then return the new
1025  *      connected fd. We collect the address of the connector in kernel
1026  *      space and move it to user at the very end. This is unclean because
1027  *      we open the socket then return an error.
1028  *
1029  *      1003.1g adds the ability to recvmsg() to query connection pending
1030  *      status to recvmsg. We need to add that support in a way thats
1031  *      clean when we restucture accept also.
1032  */
1033 
1034 asmlinkage long sys_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen)
1035 {
1036         struct socket *sock, *newsock;
1037         int err, len;
1038         char address[MAX_SOCK_ADDR];
1039 
1040         sock = sockfd_lookup(fd, &err);
1041         if (!sock)
1042                 goto out;
1043 
1044         err = -EMFILE;
1045         if (!(newsock = sock_alloc())) 
1046                 goto out_put;
1047 
1048         newsock->type = sock->type;
1049         newsock->ops = sock->ops;
1050 
1051         err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1052         if (err < 0)
1053                 goto out_release;
1054 
1055         if (upeer_sockaddr) {
1056                 if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) {
1057                         err = -ECONNABORTED;
1058                         goto out_release;
1059                 }
1060                 err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen);
1061                 if (err < 0)
1062                         goto out_release;
1063         }
1064 
1065         /* File flags are not inherited via accept() unlike another OSes. */
1066 
1067         if ((err = sock_map_fd(newsock)) < 0)
1068                 goto out_release;
1069 
1070 out_put:
1071         sockfd_put(sock);
1072 out:
1073         return err;
1074 
1075 out_release:
1076         sock_release(newsock);
1077         goto out_put;
1078 }
1079 
1080 
1081 /*
1082  *      Attempt to connect to a socket with the server address.  The address
1083  *      is in user space so we verify it is OK and move it to kernel space.
1084  *
1085  *      For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1086  *      break bindings
1087  *
1088  *      NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1089  *      other SEQPACKET protocols that take time to connect() as it doesn't
1090  *      include the -EINPROGRESS status for such sockets.
1091  */
1092 
1093 asmlinkage long sys_connect(int fd, struct sockaddr *uservaddr, int addrlen)
1094 {
1095         struct socket *sock;
1096         char address[MAX_SOCK_ADDR];
1097         int err;
1098 
1099         sock = sockfd_lookup(fd, &err);
1100         if (!sock)
1101                 goto out;
1102         err = move_addr_to_kernel(uservaddr, addrlen, address);
1103         if (err < 0)
1104                 goto out_put;
1105         err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
1106                                  sock->file->f_flags);
1107 out_put:
1108         sockfd_put(sock);
1109 out:
1110         return err;
1111 }
1112 
1113 /*
1114  *      Get the local address ('name') of a socket object. Move the obtained
1115  *      name to user space.
1116  */
1117 
1118 asmlinkage long sys_getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
1119 {
1120         struct socket *sock;
1121         char address[MAX_SOCK_ADDR];
1122         int len, err;
1123         
1124         sock = sockfd_lookup(fd, &err);
1125         if (!sock)
1126                 goto out;
1127         err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1128         if (err)
1129                 goto out_put;
1130         err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1131 
1132 out_put:
1133         sockfd_put(sock);
1134 out:
1135         return err;
1136 }
1137 
1138 /*
1139  *      Get the remote address ('name') of a socket object. Move the obtained
1140  *      name to user space.
1141  */
1142 
1143 asmlinkage long sys_getpeername(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
1144 {
1145         struct socket *sock;
1146         char address[MAX_SOCK_ADDR];
1147         int len, err;
1148 
1149         if ((sock = sockfd_lookup(fd, &err))!=NULL)
1150         {
1151                 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1);
1152                 if (!err)
1153                         err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
1154                 sockfd_put(sock);
1155         }
1156         return err;
1157 }
1158 
1159 /*
1160  *      Send a datagram to a given address. We move the address into kernel
1161  *      space and check the user space data area is readable before invoking
1162  *      the protocol.
1163  */
1164 
1165 asmlinkage long sys_sendto(int fd, void * buff, size_t len, unsigned flags,
1166                            struct sockaddr *addr, int addr_len)
1167 {
1168         struct socket *sock;
1169         char address[MAX_SOCK_ADDR];
1170         int err;
1171         struct msghdr msg;
1172         struct iovec iov;
1173         
1174         sock = sockfd_lookup(fd, &err);
1175         if (!sock)
1176                 goto out;
1177         iov.iov_base=buff;
1178         iov.iov_len=len;
1179         msg.msg_name=NULL;
1180         msg.msg_iov=&iov;
1181         msg.msg_iovlen=1;
1182         msg.msg_control=NULL;
1183         msg.msg_controllen=0;
1184         msg.msg_namelen=addr_len;
1185         if(addr)
1186         {
1187                 err = move_addr_to_kernel(addr, addr_len, address);
1188                 if (err < 0)
1189                         goto out_put;
1190                 msg.msg_name=address;
1191         }
1192         if (sock->file->f_flags & O_NONBLOCK)
1193                 flags |= MSG_DONTWAIT;
1194         msg.msg_flags = flags;
1195         err = sock_sendmsg(sock, &msg, len);
1196 
1197 out_put:                
1198         sockfd_put(sock);
1199 out:
1200         return err;
1201 }
1202 
1203 /*
1204  *      Send a datagram down a socket. 
1205  */
1206 
1207 asmlinkage long sys_send(int fd, void * buff, size_t len, unsigned flags)
1208 {
1209         return sys_sendto(fd, buff, len, flags, NULL, 0);
1210 }
1211 
1212 /*
1213  *      Receive a frame from the socket and optionally record the address of the 
1214  *      sender. We verify the buffers are writable and if needed move the
1215  *      sender address from kernel to user space.
1216  */
1217 
1218 asmlinkage long sys_recvfrom(int fd, void * ubuf, size_t size, unsigned flags,
1219                              struct sockaddr *addr, int *addr_len)
1220 {
1221         struct socket *sock;
1222         struct iovec iov;
1223         struct msghdr msg;
1224         char address[MAX_SOCK_ADDR];
1225         int err,err2;
1226 
1227         sock = sockfd_lookup(fd, &err);
1228         if (!sock)
1229                 goto out;
1230 
1231         msg.msg_control=NULL;
1232         msg.msg_controllen=0;
1233         msg.msg_iovlen=1;
1234         msg.msg_iov=&iov;
1235         iov.iov_len=size;
1236         iov.iov_base=ubuf;
1237         msg.msg_name=address;
1238         msg.msg_namelen=MAX_SOCK_ADDR;
1239         if (sock->file->f_flags & O_NONBLOCK)
1240                 flags |= MSG_DONTWAIT;
1241         err=sock_recvmsg(sock, &msg, size, flags);
1242 
1243         if(err >= 0 && addr != NULL && msg.msg_namelen)
1244         {
1245                 err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1246                 if(err2<0)
1247                         err=err2;
1248         }
1249         sockfd_put(sock);                       
1250 out:
1251         return err;
1252 }
1253 
1254 /*
1255  *      Receive a datagram from a socket. 
1256  */
1257 
1258 asmlinkage long sys_recv(int fd, void * ubuf, size_t size, unsigned flags)
1259 {
1260         return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1261 }
1262 
1263 /*
1264  *      Set a socket option. Because we don't know the option lengths we have
1265  *      to pass the user mode parameter for the protocols to sort out.
1266  */
1267 
1268 asmlinkage long sys_setsockopt(int fd, int level, int optname, char *optval, int optlen)
1269 {
1270         int err;
1271         struct socket *sock;
1272         
1273         if ((sock = sockfd_lookup(fd, &err))!=NULL)
1274         {
1275                 if (level == SOL_SOCKET)
1276                         err=sock_setsockopt(sock,level,optname,optval,optlen);
1277                 else
1278                         err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
1279                 sockfd_put(sock);
1280         }
1281         return err;
1282 }
1283 
1284 /*
1285  *      Get a socket option. Because we don't know the option lengths we have
1286  *      to pass a user mode parameter for the protocols to sort out.
1287  */
1288 
1289 asmlinkage long sys_getsockopt(int fd, int level, int optname, char *optval, int *optlen)
1290 {
1291         int err;
1292         struct socket *sock;
1293 
1294         if ((sock = sockfd_lookup(fd, &err))!=NULL)
1295         {
1296                 if (level == SOL_SOCKET)
1297                         err=sock_getsockopt(sock,level,optname,optval,optlen);
1298                 else
1299                         err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
1300                 sockfd_put(sock);
1301         }
1302         return err;
1303 }
1304 
1305 
1306 /*
1307  *      Shutdown a socket.
1308  */
1309 
1310 asmlinkage long sys_shutdown(int fd, int how)
1311 {
1312         int err;
1313         struct socket *sock;
1314 
1315         if ((sock = sockfd_lookup(fd, &err))!=NULL)
1316         {
1317                 err=sock->ops->shutdown(sock, how);
1318                 sockfd_put(sock);
1319         }
1320         return err;
1321 }
1322 
1323 /*
1324  *      BSD sendmsg interface
1325  */
1326 
1327 asmlinkage long sys_sendmsg(int fd, struct msghdr *msg, unsigned flags)
1328 {
1329         struct socket *sock;
1330         char address[MAX_SOCK_ADDR];
1331         struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1332         unsigned char ctl[sizeof(struct cmsghdr) + 20]; /* 20 is size of ipv6_pktinfo */
1333         unsigned char *ctl_buf = ctl;
1334         struct msghdr msg_sys;
1335         int err, ctl_len, iov_size, total_len;
1336         
1337         err = -EFAULT;
1338         if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1339                 goto out; 
1340 
1341         sock = sockfd_lookup(fd, &err);
1342         if (!sock) 
1343                 goto out;
1344 
1345         /* do not move before msg_sys is valid */
1346         err = -EINVAL;
1347         if (msg_sys.msg_iovlen > UIO_MAXIOV)
1348                 goto out_put;
1349 
1350         /* Check whether to allocate the iovec area*/
1351         err = -ENOMEM;
1352         iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1353         if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1354                 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1355                 if (!iov)
1356                         goto out_put;
1357         }
1358 
1359         /* This will also move the address data into kernel space */
1360         err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
1361         if (err < 0) 
1362                 goto out_freeiov;
1363         total_len = err;
1364 
1365         err = -ENOBUFS;
1366 
1367         if (msg_sys.msg_controllen > INT_MAX)
1368                 goto out_freeiov;
1369         ctl_len = msg_sys.msg_controllen; 
1370         if (ctl_len) 
1371         {
1372                 if (ctl_len > sizeof(ctl))
1373                 {
1374                         ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
1375                         if (ctl_buf == NULL) 
1376                                 goto out_freeiov;
1377                 }
1378                 err = -EFAULT;
1379                 if (copy_from_user(ctl_buf, msg_sys.msg_control, ctl_len))
1380                         goto out_freectl;
1381                 msg_sys.msg_control = ctl_buf;
1382         }
1383         msg_sys.msg_flags = flags;
1384 
1385         if (sock->file->f_flags & O_NONBLOCK)
1386                 msg_sys.msg_flags |= MSG_DONTWAIT;
1387         err = sock_sendmsg(sock, &msg_sys, total_len);
1388 
1389 out_freectl:
1390         if (ctl_buf != ctl)    
1391                 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1392 out_freeiov:
1393         if (iov != iovstack)
1394                 sock_kfree_s(sock->sk, iov, iov_size);
1395 out_put:
1396         sockfd_put(sock);
1397 out:       
1398         return err;
1399 }
1400 
1401 /*
1402  *      BSD recvmsg interface
1403  */
1404 
1405 asmlinkage long sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
1406 {
1407         struct socket *sock;
1408         struct iovec iovstack[UIO_FASTIOV];
1409         struct iovec *iov=iovstack;
1410         struct msghdr msg_sys;
1411         unsigned long cmsg_ptr;
1412         int err, iov_size, total_len, len;
1413 
1414         /* kernel mode address */
1415         char addr[MAX_SOCK_ADDR];
1416 
1417         /* user mode address pointers */
1418         struct sockaddr *uaddr;
1419         int *uaddr_len;
1420         
1421         err=-EFAULT;
1422         if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1423                 goto out;
1424 
1425         sock = sockfd_lookup(fd, &err);
1426         if (!sock)
1427                 goto out;
1428 
1429         err = -EINVAL;
1430         if (msg_sys.msg_iovlen > UIO_MAXIOV)
1431                 goto out_put;
1432         
1433         /* Check whether to allocate the iovec area*/
1434         err = -ENOMEM;
1435         iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1436         if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1437                 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1438                 if (!iov)
1439                         goto out_put;
1440         }
1441 
1442         /*
1443          *      Save the user-mode address (verify_iovec will change the
1444          *      kernel msghdr to use the kernel address space)
1445          */
1446          
1447         uaddr = msg_sys.msg_name;
1448         uaddr_len = &msg->msg_namelen;
1449         err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1450         if (err < 0)
1451                 goto out_freeiov;
1452         total_len=err;
1453 
1454         cmsg_ptr = (unsigned long)msg_sys.msg_control;
1455         msg_sys.msg_flags = 0;
1456         
1457         if (sock->file->f_flags & O_NONBLOCK)
1458                 flags |= MSG_DONTWAIT;
1459         err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1460         if (err < 0)
1461                 goto out_freeiov;
1462         len = err;
1463 
1464         if (uaddr != NULL && msg_sys.msg_namelen) {
1465                 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
1466                 if (err < 0)
1467                         goto out_freeiov;
1468         }
1469         err = __put_user(msg_sys.msg_flags, &msg->msg_flags);
1470         if (err)
1471                 goto out_freeiov;
1472         err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, 
1473                                                          &msg->msg_controllen);
1474         if (err)
1475                 goto out_freeiov;
1476         err = len;
1477 
1478 out_freeiov:
1479         if (iov != iovstack)
1480                 sock_kfree_s(sock->sk, iov, iov_size);
1481 out_put:
1482         sockfd_put(sock);
1483 out:
1484         return err;
1485 }
1486 
1487 
1488 /*
1489  *      Perform a file control on a socket file descriptor.
1490  *
1491  *      Doesn't acquire a fd lock, because no network fcntl
1492  *      function sleeps currently.
1493  */
1494 
1495 int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg)
1496 {
1497         struct socket *sock;
1498 
1499         sock = socki_lookup (filp->f_dentry->d_inode);
1500         if (sock && sock->ops)
1501                 return sock_no_fcntl(sock, cmd, arg);
1502         return(-EINVAL);
1503 }
1504 
1505 /* Argument list sizes for sys_socketcall */
1506 #define AL(x) ((x) * sizeof(unsigned long))
1507 static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1508                                 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1509                                 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
1510 #undef AL
1511 
1512 /*
1513  *      System call vectors. 
1514  *
1515  *      Argument checking cleaned up. Saved 20% in size.
1516  *  This function doesn't need to set the kernel lock because
1517  *  it is set by the callees. 
1518  */
1519 
1520 asmlinkage long sys_socketcall(int call, unsigned long *args)
1521 {
1522         unsigned long a[6];
1523         unsigned long a0,a1;
1524         int err;
1525 
1526         if(call<1||call>SYS_RECVMSG)
1527                 return -EINVAL;
1528 
1529         /* copy_from_user should be SMP safe. */
1530         if (copy_from_user(a, args, nargs[call]))
1531                 return -EFAULT;
1532                 
1533         a0=a[0];
1534         a1=a[1];
1535         
1536         switch(call) 
1537         {
1538                 case SYS_SOCKET:
1539                         err = sys_socket(a0,a1,a[2]);
1540                         break;
1541                 case SYS_BIND:
1542                         err = sys_bind(a0,(struct sockaddr *)a1, a[2]);
1543                         break;
1544                 case SYS_CONNECT:
1545                         err = sys_connect(a0, (struct sockaddr *)a1, a[2]);
1546                         break;
1547                 case SYS_LISTEN:
1548                         err = sys_listen(a0,a1);
1549                         break;
1550                 case SYS_ACCEPT:
1551                         err = sys_accept(a0,(struct sockaddr *)a1, (int *)a[2]);
1552                         break;
1553                 case SYS_GETSOCKNAME:
1554                         err = sys_getsockname(a0,(struct sockaddr *)a1, (int *)a[2]);
1555                         break;
1556                 case SYS_GETPEERNAME:
1557                         err = sys_getpeername(a0, (struct sockaddr *)a1, (int *)a[2]);
1558                         break;
1559                 case SYS_SOCKETPAIR:
1560                         err = sys_socketpair(a0,a1, a[2], (int *)a[3]);
1561                         break;
1562                 case SYS_SEND:
1563                         err = sys_send(a0, (void *)a1, a[2], a[3]);
1564                         break;
1565                 case SYS_SENDTO:
1566                         err = sys_sendto(a0,(void *)a1, a[2], a[3],
1567                                          (struct sockaddr *)a[4], a[5]);
1568                         break;
1569                 case SYS_RECV:
1570                         err = sys_recv(a0, (void *)a1, a[2], a[3]);
1571                         break;
1572                 case SYS_RECVFROM:
1573                         err = sys_recvfrom(a0, (void *)a1, a[2], a[3],
1574                                            (struct sockaddr *)a[4], (int *)a[5]);
1575                         break;
1576                 case SYS_SHUTDOWN:
1577                         err = sys_shutdown(a0,a1);
1578                         break;
1579                 case SYS_SETSOCKOPT:
1580                         err = sys_setsockopt(a0, a1, a[2], (char *)a[3], a[4]);
1581                         break;
1582                 case SYS_GETSOCKOPT:
1583                         err = sys_getsockopt(a0, a1, a[2], (char *)a[3], (int *)a[4]);
1584                         break;
1585                 case SYS_SENDMSG:
1586                         err = sys_sendmsg(a0, (struct msghdr *) a1, a[2]);
1587                         break;
1588                 case SYS_RECVMSG:
1589                         err = sys_recvmsg(a0, (struct msghdr *) a1, a[2]);
1590                         break;
1591                 default:
1592                         err = -EINVAL;
1593                         break;
1594         }
1595         return err;
1596 }
1597 
1598 /*
1599  *      This function is called by a protocol handler that wants to
1600  *      advertise its address family, and have it linked into the
1601  *      SOCKET module.
1602  */
1603 
1604 int sock_register(struct net_proto_family *ops)
1605 {
1606         int err;
1607 
1608         if (ops->family >= NPROTO) {
1609                 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1610                 return -ENOBUFS;
1611         }
1612         net_family_write_lock();
1613         err = -EEXIST;
1614         if (net_families[ops->family] == NULL) {
1615                 net_families[ops->family]=ops;
1616                 err = 0;
1617         }
1618         net_family_write_unlock();
1619         return err;
1620 }
1621 
1622 /*
1623  *      This function is called by a protocol handler that wants to
1624  *      remove its address family, and have it unlinked from the
1625  *      SOCKET module.
1626  */
1627 
1628 int sock_unregister(int family)
1629 {
1630         if (family < 0 || family >= NPROTO)
1631                 return -1;
1632 
1633         net_family_write_lock();
1634         net_families[family]=NULL;
1635         net_family_write_unlock();
1636         return 0;
1637 }
1638 
1639 
1640 extern void sk_init(void);
1641 
1642 #ifdef CONFIG_WAN_ROUTER
1643 extern void wanrouter_init(void);
1644 #endif
1645 
1646 void __init sock_init(void)
1647 {
1648         int i;
1649 
1650         printk(KERN_INFO "Linux NET4.0 for Linux 2.4\n");
1651         printk(KERN_INFO "Based upon Swansea University Computer Society NET3.039\n");
1652 
1653         /*
1654          *      Initialize all address (protocol) families. 
1655          */
1656          
1657         for (i = 0; i < NPROTO; i++) 
1658                 net_families[i] = NULL;
1659 
1660         /*
1661          *      Initialize sock SLAB cache.
1662          */
1663          
1664         sk_init();
1665 
1666 #ifdef SLAB_SKB
1667         /*
1668          *      Initialize skbuff SLAB cache 
1669          */
1670         skb_init();
1671 #endif
1672 
1673         /*
1674          *      Wan router layer. 
1675          */
1676 
1677 #ifdef CONFIG_WAN_ROUTER         
1678         wanrouter_init();
1679 #endif
1680 
1681         /*
1682          *      Initialize the protocols module. 
1683          */
1684 
1685         register_filesystem(&sock_fs_type);
1686         sock_mnt = kern_mount(&sock_fs_type);
1687         /* The real protocol initialization is performed when
1688          *  do_initcalls is run.  
1689          */
1690 
1691 
1692         /*
1693          * The netlink device handler may be needed early.
1694          */
1695 
1696 #ifdef  CONFIG_RTNETLINK
1697         rtnetlink_init();
1698 #endif
1699 #ifdef CONFIG_NETLINK_DEV
1700         init_netlink();
1701 #endif
1702 #ifdef CONFIG_NETFILTER
1703         netfilter_init();
1704 #endif
1705 }
1706 
1707 int socket_get_info(char *buffer, char **start, off_t offset, int length)
1708 {
1709         int len, cpu;
1710         int counter = 0;
1711 
1712         for (cpu=0; cpu<smp_num_cpus; cpu++)
1713                 counter += sockets_in_use[cpu_logical_map(cpu)].counter;
1714 
1715         /* It can be negative, by the way. 8) */
1716         if (counter < 0)
1717                 counter = 0;
1718 
1719         len = sprintf(buffer, "sockets: used %d\n", counter);
1720         if (offset >= len)
1721         {
1722                 *start = buffer;
1723                 return 0;
1724         }
1725         *start = buffer + offset;
1726         len -= offset;
1727         if (len > length)
1728                 len = length;
1729         if (len < 0)
1730                 len = 0;
1731         return len;
1732 }
1733 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.