~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Linux Cross Reference
Linux/net/packet/af_packet.c

Version: ~ [ 2.4.0 ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
  3  *              operating system.  INET is implemented using the  BSD Socket
  4  *              interface as the means of communication with the user level.
  5  *
  6  *              PACKET - implements raw packet sockets.
  7  *
  8  * Version:     $Id: af_packet.c,v 1.47 2000/12/08 17:15:54 davem Exp $
  9  *
 10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
 11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 12  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
 13  *
 14  * Fixes:       
 15  *              Alan Cox        :       verify_area() now used correctly
 16  *              Alan Cox        :       new skbuff lists, look ma no backlogs!
 17  *              Alan Cox        :       tidied skbuff lists.
 18  *              Alan Cox        :       Now uses generic datagram routines I
 19  *                                      added. Also fixed the peek/read crash
 20  *                                      from all old Linux datagram code.
 21  *              Alan Cox        :       Uses the improved datagram code.
 22  *              Alan Cox        :       Added NULL's for socket options.
 23  *              Alan Cox        :       Re-commented the code.
 24  *              Alan Cox        :       Use new kernel side addressing
 25  *              Rob Janssen     :       Correct MTU usage.
 26  *              Dave Platt      :       Counter leaks caused by incorrect
 27  *                                      interrupt locking and some slightly
 28  *                                      dubious gcc output. Can you read
 29  *                                      compiler: it said _VOLATILE_
 30  *      Richard Kooijman        :       Timestamp fixes.
 31  *              Alan Cox        :       New buffers. Use sk->mac.raw.
 32  *              Alan Cox        :       sendmsg/recvmsg support.
 33  *              Alan Cox        :       Protocol setting support
 34  *      Alexey Kuznetsov        :       Untied from IPv4 stack.
 35  *      Cyrus Durgin            :       Fixed kerneld for kmod.
 36  *      Michal Ostrowski        :       Module initialization cleanup.
 37  *
 38  *              This program is free software; you can redistribute it and/or
 39  *              modify it under the terms of the GNU General Public License
 40  *              as published by the Free Software Foundation; either version
 41  *              2 of the License, or (at your option) any later version.
 42  *
 43  */
 44  
 45 #include <linux/config.h>
 46 #include <linux/types.h>
 47 #include <linux/sched.h>
 48 #include <linux/mm.h>
 49 #include <linux/fcntl.h>
 50 #include <linux/socket.h>
 51 #include <linux/in.h>
 52 #include <linux/inet.h>
 53 #include <linux/netdevice.h>
 54 #include <linux/if_packet.h>
 55 #include <linux/wireless.h>
 56 #include <linux/kmod.h>
 57 #include <net/ip.h>
 58 #include <net/protocol.h>
 59 #include <linux/skbuff.h>
 60 #include <net/sock.h>
 61 #include <linux/errno.h>
 62 #include <linux/timer.h>
 63 #include <asm/system.h>
 64 #include <asm/uaccess.h>
 65 #include <linux/proc_fs.h>
 66 #include <linux/poll.h>
 67 #include <linux/module.h>
 68 #include <linux/init.h>
 69 #include <linux/if_bridge.h>
 70 
 71 #ifdef CONFIG_NET_DIVERT
 72 #include <linux/divert.h>
 73 #endif /* CONFIG_NET_DIVERT */
 74 
 75 #ifdef CONFIG_INET
 76 #include <net/inet_common.h>
 77 #endif
 78 
 79 #ifdef CONFIG_DLCI
 80 extern int dlci_ioctl(unsigned int, void*);
 81 #endif
 82 
 83 #define CONFIG_SOCK_PACKET      1
 84 
 85 /*
 86    Proposed replacement for SIOC{ADD,DEL}MULTI and
 87    IFF_PROMISC, IFF_ALLMULTI flags.
 88 
 89    It is more expensive, but I believe,
 90    it is really correct solution: reentereble, safe and fault tolerant.
 91 
 92    IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
 93    reference count and global flag, so that real status is
 94    (gflag|(count != 0)), so that we can use obsolete faulty interface
 95    not harming clever users.
 96  */
 97 #define CONFIG_PACKET_MULTICAST 1
 98 
 99 /*
100    Assumptions:
101    - if device has no dev->hard_header routine, it adds and removes ll header
102      inside itself. In this case ll header is invisible outside of device,
103      but higher levels still should reserve dev->hard_header_len.
104      Some devices are enough clever to reallocate skb, when header
105      will not fit to reserved space (tunnel), another ones are silly
106      (PPP).
107    - packet socket receives packets with pulled ll header,
108      so that SOCK_RAW should push it back.
109 
110 On receive:
111 -----------
112 
113 Incoming, dev->hard_header!=NULL
114    mac.raw -> ll header
115    data    -> data
116 
117 Outgoing, dev->hard_header!=NULL
118    mac.raw -> ll header
119    data    -> ll header
120 
121 Incoming, dev->hard_header==NULL
122    mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
123               PPP makes it, that is wrong, because introduce assymetry
124               between rx and tx paths.
125    data    -> data
126 
127 Outgoing, dev->hard_header==NULL
128    mac.raw -> data. ll header is still not built!
129    data    -> data
130 
131 Resume
132   If dev->hard_header==NULL we are unlikely to restore sensible ll header.
133 
134 
135 On transmit:
136 ------------
137 
138 dev->hard_header != NULL
139    mac.raw -> ll header
140    data    -> ll header
141 
142 dev->hard_header == NULL (ll header is added by device, we cannot control it)
143    mac.raw -> data
144    data -> data
145 
146    We should set nh.raw on output to correct posistion,
147    packet classifier depends on it.
148  */
149 
150 /* List of all packet sockets. */
151 static struct sock * packet_sklist = NULL;
152 static rwlock_t packet_sklist_lock = RW_LOCK_UNLOCKED;
153 
154 atomic_t packet_socks_nr;
155 
156 
157 /* Private packet socket structures. */
158 
159 #ifdef CONFIG_PACKET_MULTICAST
160 struct packet_mclist
161 {
162         struct packet_mclist    *next;
163         int                     ifindex;
164         int                     count;
165         unsigned short          type;
166         unsigned short          alen;
167         unsigned char           addr[8];
168 };
169 #endif
170 #ifdef CONFIG_PACKET_MMAP
171 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
172 #endif
173 
174 static void packet_flush_mclist(struct sock *sk);
175 
176 struct packet_opt
177 {
178         struct packet_type      prot_hook;
179         spinlock_t              bind_lock;
180         char                    running;        /* prot_hook is attached*/
181         int                     ifindex;        /* bound device         */
182         struct tpacket_stats    stats;
183 #ifdef CONFIG_PACKET_MULTICAST
184         struct packet_mclist    *mclist;
185 #endif
186 #ifdef CONFIG_PACKET_MMAP
187         atomic_t                mapped;
188         unsigned long           *pg_vec;
189         unsigned int            pg_vec_order;
190         unsigned int            pg_vec_pages;
191         unsigned int            pg_vec_len;
192 
193         struct tpacket_hdr      **iovec;
194         unsigned int            frame_size;
195         unsigned int            iovmax;
196         unsigned int            head;
197         int                     copy_thresh;
198 #endif
199 };
200 
201 void packet_sock_destruct(struct sock *sk)
202 {
203         BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
204         BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
205 
206         if (!sk->dead) {
207                 printk("Attempt to release alive packet socket: %p\n", sk);
208                 return;
209         }
210 
211         if (sk->protinfo.destruct_hook)
212                 kfree(sk->protinfo.destruct_hook);
213         atomic_dec(&packet_socks_nr);
214 #ifdef PACKET_REFCNT_DEBUG
215         printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
216 #endif
217         MOD_DEC_USE_COUNT;
218 }
219 
220 
221 extern struct proto_ops packet_ops;
222 
223 #ifdef CONFIG_SOCK_PACKET
224 extern struct proto_ops packet_ops_spkt;
225 
226 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
227 {
228         struct sock *sk;
229         struct sockaddr_pkt *spkt;
230 
231         /*
232          *      When we registered the protocol we saved the socket in the data
233          *      field for just this event.
234          */
235 
236         sk = (struct sock *) pt->data;
237         
238         /*
239          *      Yank back the headers [hope the device set this
240          *      right or kerboom...]
241          *
242          *      Incoming packets have ll header pulled,
243          *      push it back.
244          *
245          *      For outgoing ones skb->data == skb->mac.raw
246          *      so that this procedure is noop.
247          */
248 
249         if (skb->pkt_type == PACKET_LOOPBACK)
250                 goto out;
251 
252         if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
253                 goto oom;
254 
255         spkt = (struct sockaddr_pkt*)skb->cb;
256 
257         skb_push(skb, skb->data-skb->mac.raw);
258 
259         /*
260          *      The SOCK_PACKET socket receives _all_ frames.
261          */
262 
263         spkt->spkt_family = dev->type;
264         strncpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
265         spkt->spkt_protocol = skb->protocol;
266 
267         /*
268          *      Charge the memory to the socket. This is done specifically
269          *      to prevent sockets using all the memory up.
270          */
271 
272         if (sock_queue_rcv_skb(sk,skb) == 0)
273                 return 0;
274 
275 out:
276         kfree_skb(skb);
277 oom:
278         return 0;
279 }
280 
281 
282 /*
283  *      Output a raw packet to a device layer. This bypasses all the other
284  *      protocol layers and you must therefore supply it with a complete frame
285  */
286  
287 static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, int len,
288                                struct scm_cookie *scm)
289 {
290         struct sock *sk = sock->sk;
291         struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
292         struct sk_buff *skb;
293         struct net_device *dev;
294         unsigned short proto=0;
295         int err;
296         
297         /*
298          *      Get and verify the address. 
299          */
300 
301         if (saddr)
302         {
303                 if (msg->msg_namelen < sizeof(struct sockaddr))
304                         return(-EINVAL);
305                 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
306                         proto=saddr->spkt_protocol;
307         }
308         else
309                 return(-ENOTCONN);      /* SOCK_PACKET must be sent giving an address */
310 
311         /*
312          *      Find the device first to size check it 
313          */
314 
315         saddr->spkt_device[13] = 0;
316         dev = dev_get_by_name(saddr->spkt_device);
317         err = -ENODEV;
318         if (dev == NULL)
319                 goto out_unlock;
320         
321         /*
322          *      You may not queue a frame bigger than the mtu. This is the lowest level
323          *      raw protocol and you must do your own fragmentation at this level.
324          */
325          
326         err = -EMSGSIZE;
327         if(len>dev->mtu+dev->hard_header_len)
328                 goto out_unlock;
329 
330         err = -ENOBUFS;
331         skb = sock_wmalloc(sk, len+dev->hard_header_len+15, 0, GFP_KERNEL);
332 
333         /*
334          *      If the write buffer is full, then tough. At this level the user gets to
335          *      deal with the problem - do your own algorithmic backoffs. That's far
336          *      more flexible.
337          */
338          
339         if (skb == NULL) 
340                 goto out_unlock;
341 
342         /*
343          *      Fill it in 
344          */
345          
346         /* FIXME: Save some space for broken drivers that write a
347          * hard header at transmission time by themselves. PPP is the
348          * notable one here. This should really be fixed at the driver level.
349          */
350         skb_reserve(skb,(dev->hard_header_len+15)&~15);
351         skb->nh.raw = skb->data;
352 
353         /* Try to align data part correctly */
354         if (dev->hard_header) {
355                 skb->data -= dev->hard_header_len;
356                 skb->tail -= dev->hard_header_len;
357         }
358 
359         /* Returns -EFAULT on error */
360         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
361         skb->protocol = proto;
362         skb->dev = dev;
363         skb->priority = sk->priority;
364         if (err)
365                 goto out_free;
366 
367         err = -ENETDOWN;
368         if (!(dev->flags & IFF_UP))
369                 goto out_free;
370 
371         /*
372          *      Now send it
373          */
374 
375         dev_queue_xmit(skb);
376         dev_put(dev);
377         return(len);
378 
379 out_free:
380         kfree_skb(skb);
381 out_unlock:
382         if (dev)
383                 dev_put(dev);
384         return err;
385 }
386 #endif
387 
388 /*
389    This function makes lazy skb cloning in hope that most of packets
390    are discarded by BPF.
391 
392    Note tricky part: we DO mangle shared skb! skb->data, skb->len
393    and skb->cb are mangled. It works because (and until) packets
394    falling here are owned by current CPU. Output packets are cloned
395    by dev_queue_xmit_nit(), input packets are processed by net_bh
396    sequencially, so that if we return skb to original state on exit,
397    we will not harm anyone.
398  */
399 
400 static int packet_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
401 {
402         struct sock *sk;
403         struct sockaddr_ll *sll;
404         struct packet_opt *po;
405         u8 * skb_head = skb->data;
406 #ifdef CONFIG_FILTER
407         unsigned snaplen;
408 #endif
409 
410         if (skb->pkt_type == PACKET_LOOPBACK)
411                 goto drop;
412 
413         sk = (struct sock *) pt->data;
414         po = sk->protinfo.af_packet;
415 
416         skb->dev = dev;
417 
418         if (dev->hard_header) {
419                 /* The device has an explicit notion of ll header,
420                    exported to higher levels.
421 
422                    Otherwise, the device hides datails of it frame
423                    structure, so that corresponding packet head
424                    never delivered to user.
425                  */
426                 if (sk->type != SOCK_DGRAM)
427                         skb_push(skb, skb->data - skb->mac.raw);
428                 else if (skb->pkt_type == PACKET_OUTGOING) {
429                         /* Special case: outgoing packets have ll header at head */
430                         skb_pull(skb, skb->nh.raw - skb->data);
431                 }
432         }
433 
434 #ifdef CONFIG_FILTER
435         snaplen = skb->len;
436 
437         if (sk->filter) {
438                 unsigned res = snaplen;
439                 struct sk_filter *filter;
440 
441                 bh_lock_sock(sk);
442                 if ((filter = sk->filter) != NULL)
443                         res = sk_run_filter(skb, sk->filter->insns, sk->filter->len);
444                 bh_unlock_sock(sk);
445 
446                 if (res == 0)
447                         goto drop_n_restore;
448                 if (snaplen > res)
449                         snaplen = res;
450         }
451 #endif /* CONFIG_FILTER */
452 
453         if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
454                 goto drop_n_acct;
455 
456         if (skb_shared(skb)) {
457                 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
458                 if (nskb == NULL)
459                         goto drop_n_acct;
460 
461                 if (skb_head != skb->data) {
462                         skb->data = skb_head;
463                         skb->len = skb->tail - skb->data;
464                 }
465                 kfree_skb(skb);
466                 skb = nskb;
467         }
468 
469         sll = (struct sockaddr_ll*)skb->cb;
470         sll->sll_family = AF_PACKET;
471         sll->sll_hatype = dev->type;
472         sll->sll_protocol = skb->protocol;
473         sll->sll_pkttype = skb->pkt_type;
474         sll->sll_ifindex = dev->ifindex;
475         sll->sll_halen = 0;
476 
477         if (dev->hard_header_parse)
478                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
479 
480 #ifdef CONFIG_FILTER
481         if (skb->len > snaplen)
482                 __skb_trim(skb, snaplen);
483 #endif
484 
485         skb_set_owner_r(skb, sk);
486         skb->dev = NULL;
487         spin_lock(&sk->receive_queue.lock);
488         po->stats.tp_packets++;
489         __skb_queue_tail(&sk->receive_queue, skb);
490         spin_unlock(&sk->receive_queue.lock);
491         sk->data_ready(sk,skb->len);
492         return 0;
493 
494 drop_n_acct:
495         spin_lock(&sk->receive_queue.lock);
496         po->stats.tp_drops++;
497         spin_unlock(&sk->receive_queue.lock);
498 
499 #ifdef CONFIG_FILTER
500 drop_n_restore:
501 #endif
502         if (skb_head != skb->data && skb_shared(skb)) {
503                 skb->data = skb_head;
504                 skb->len = skb->tail - skb->data;
505         }
506 drop:
507         kfree_skb(skb);
508         return 0;
509 }
510 
511 #ifdef CONFIG_PACKET_MMAP
512 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt)
513 {
514         struct sock *sk;
515         struct packet_opt *po;
516         struct sockaddr_ll *sll;
517         struct tpacket_hdr *h;
518         u8 * skb_head = skb->data;
519         unsigned snaplen;
520         unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
521         unsigned short macoff, netoff;
522         struct sk_buff *copy_skb = NULL;
523 
524         if (skb->pkt_type == PACKET_LOOPBACK)
525                 goto drop;
526 
527         sk = (struct sock *) pt->data;
528         po = sk->protinfo.af_packet;
529 
530         if (dev->hard_header) {
531                 if (sk->type != SOCK_DGRAM)
532                         skb_push(skb, skb->data - skb->mac.raw);
533                 else if (skb->pkt_type == PACKET_OUTGOING) {
534                         /* Special case: outgoing packets have ll header at head */
535                         skb_pull(skb, skb->nh.raw - skb->data);
536                 }
537         }
538 
539         snaplen = skb->len;
540 
541 #ifdef CONFIG_FILTER
542         if (sk->filter) {
543                 unsigned res = snaplen;
544                 struct sk_filter *filter;
545 
546                 bh_lock_sock(sk);
547                 if ((filter = sk->filter) != NULL)
548                         res = sk_run_filter(skb, sk->filter->insns, sk->filter->len);
549                 bh_unlock_sock(sk);
550 
551                 if (res == 0)
552                         goto drop_n_restore;
553                 if (snaplen > res)
554                         snaplen = res;
555         }
556 #endif
557 
558         if (sk->type == SOCK_DGRAM) {
559                 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
560         } else {
561                 unsigned maclen = skb->nh.raw - skb->data;
562                 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
563                 macoff = netoff - maclen;
564         }
565 
566         if (macoff + snaplen > po->frame_size) {
567                 if (po->copy_thresh &&
568                     atomic_read(&sk->rmem_alloc) + skb->truesize < (unsigned)sk->rcvbuf) {
569                         if (skb_shared(skb)) {
570                                 copy_skb = skb_clone(skb, GFP_ATOMIC);
571                         } else {
572                                 copy_skb = skb_get(skb);
573                                 skb_head = skb->data;
574                         }
575                         if (copy_skb)
576                                 skb_set_owner_r(copy_skb, sk);
577                 }
578                 snaplen = po->frame_size - macoff;
579                 if ((int)snaplen < 0)
580                         snaplen = 0;
581         }
582 
583         spin_lock(&sk->receive_queue.lock);
584         h = po->iovec[po->head];
585 
586         if (h->tp_status)
587                 goto ring_is_full;
588         po->head = po->head != po->iovmax ? po->head+1 : 0;
589         po->stats.tp_packets++;
590         if (copy_skb) {
591                 status |= TP_STATUS_COPY;
592                 __skb_queue_tail(&sk->receive_queue, copy_skb);
593         }
594         if (!po->stats.tp_drops)
595                 status &= ~TP_STATUS_LOSING;
596         spin_unlock(&sk->receive_queue.lock);
597 
598         memcpy((u8*)h + macoff, skb->data, snaplen);
599 
600         h->tp_len = skb->len;
601         h->tp_snaplen = snaplen;
602         h->tp_mac = macoff;
603         h->tp_net = netoff;
604         h->tp_sec = skb->stamp.tv_sec;
605         h->tp_usec = skb->stamp.tv_usec;
606 
607         sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
608         sll->sll_halen = 0;
609         if (dev->hard_header_parse)
610                 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
611         sll->sll_family = AF_PACKET;
612         sll->sll_hatype = dev->type;
613         sll->sll_protocol = skb->protocol;
614         sll->sll_pkttype = skb->pkt_type;
615         sll->sll_ifindex = dev->ifindex;
616 
617         h->tp_status = status;
618         mb();
619 
620         sk->data_ready(sk, 0);
621 
622 drop_n_restore:
623         if (skb_head != skb->data && skb_shared(skb)) {
624                 skb->data = skb_head;
625                 skb->len = skb->tail - skb->data;
626         }
627 drop:
628         kfree_skb(skb);
629         return 0;
630 
631 ring_is_full:
632         po->stats.tp_drops++;
633         spin_unlock(&sk->receive_queue.lock);
634 
635         sk->data_ready(sk, 0);
636         if (copy_skb)
637                 kfree_skb(copy_skb);
638         goto drop_n_restore;
639 }
640 
641 #endif
642 
643 
644 static int packet_sendmsg(struct socket *sock, struct msghdr *msg, int len,
645                           struct scm_cookie *scm)
646 {
647         struct sock *sk = sock->sk;
648         struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
649         struct sk_buff *skb;
650         struct net_device *dev;
651         unsigned short proto;
652         unsigned char *addr;
653         int ifindex, err, reserve = 0;
654 
655         /*
656          *      Get and verify the address. 
657          */
658          
659         if (saddr == NULL) {
660                 ifindex = sk->protinfo.af_packet->ifindex;
661                 proto   = sk->num;
662                 addr    = NULL;
663         } else {
664                 err = -EINVAL;
665                 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
666                         goto out;
667                 ifindex = saddr->sll_ifindex;
668                 proto   = saddr->sll_protocol;
669                 addr    = saddr->sll_addr;
670         }
671 
672 
673         dev = dev_get_by_index(ifindex);
674         err = -ENXIO;
675         if (dev == NULL)
676                 goto out_unlock;
677         if (sock->type == SOCK_RAW)
678                 reserve = dev->hard_header_len;
679 
680         err = -EMSGSIZE;
681         if (len > dev->mtu+reserve)
682                 goto out_unlock;
683 
684         skb = sock_alloc_send_skb(sk, len+dev->hard_header_len+15, 0, 
685                                 msg->msg_flags & MSG_DONTWAIT, &err);
686         if (skb==NULL)
687                 goto out_unlock;
688 
689         skb_reserve(skb, (dev->hard_header_len+15)&~15);
690         skb->nh.raw = skb->data;
691 
692         if (dev->hard_header) {
693                 int res;
694                 err = -EINVAL;
695                 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
696                 if (sock->type != SOCK_DGRAM) {
697                         skb->tail = skb->data;
698                         skb->len = 0;
699                 } else if (res < 0)
700                         goto out_free;
701         }
702 
703         /* Returns -EFAULT on error */
704         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
705         if (err)
706                 goto out_free;
707 
708         skb->protocol = proto;
709         skb->dev = dev;
710         skb->priority = sk->priority;
711 
712         err = -ENETDOWN;
713         if (!(dev->flags & IFF_UP))
714                 goto out_free;
715 
716         /*
717          *      Now send it
718          */
719 
720         err = dev_queue_xmit(skb);
721         if (err > 0 && (err = net_xmit_errno(err)) != 0)
722                 goto out_unlock;
723 
724         dev_put(dev);
725 
726         return(len);
727 
728 out_free:
729         kfree_skb(skb);
730 out_unlock:
731         if (dev)
732                 dev_put(dev);
733 out:
734         return err;
735 }
736 
737 /*
738  *      Close a PACKET socket. This is fairly simple. We immediately go
739  *      to 'closed' state and remove our protocol entry in the device list.
740  */
741 
742 static int packet_release(struct socket *sock)
743 {
744         struct sock *sk = sock->sk;
745         struct sock **skp;
746 
747         if (!sk)
748                 return 0;
749 
750         write_lock_bh(&packet_sklist_lock);
751         for (skp = &packet_sklist; *skp; skp = &(*skp)->next) {
752                 if (*skp == sk) {
753                         *skp = sk->next;
754                         __sock_put(sk);
755                         break;
756                 }
757         }
758         write_unlock_bh(&packet_sklist_lock);
759 
760         /*
761          *      Unhook packet receive handler.
762          */
763 
764         if (sk->protinfo.af_packet->running) {
765                 /*
766                  *      Remove the protocol hook
767                  */
768                 dev_remove_pack(&sk->protinfo.af_packet->prot_hook);
769                 sk->protinfo.af_packet->running = 0;
770                 __sock_put(sk);
771         }
772 
773 #ifdef CONFIG_PACKET_MULTICAST
774         packet_flush_mclist(sk);
775 #endif
776 
777 #ifdef CONFIG_PACKET_MMAP
778         if (sk->protinfo.af_packet->pg_vec) {
779                 struct tpacket_req req;
780                 memset(&req, 0, sizeof(req));
781                 packet_set_ring(sk, &req, 1);
782         }
783 #endif
784 
785         /*
786          *      Now the socket is dead. No more input will appear.
787          */
788 
789         sock_orphan(sk);
790         sock->sk = NULL;
791 
792         /* Purge queues */
793 
794         skb_queue_purge(&sk->receive_queue);
795 
796         sock_put(sk);
797         return 0;
798 }
799 
800 /*
801  *      Attach a packet hook.
802  */
803 
804 static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
805 {
806         /*
807          *      Detach an existing hook if present.
808          */
809 
810         lock_sock(sk);
811 
812         spin_lock(&sk->protinfo.af_packet->bind_lock);
813         if (sk->protinfo.af_packet->running) {
814                 dev_remove_pack(&sk->protinfo.af_packet->prot_hook);
815                 __sock_put(sk);
816                 sk->protinfo.af_packet->running = 0;
817         }
818 
819         sk->num = protocol;
820         sk->protinfo.af_packet->prot_hook.type = protocol;
821         sk->protinfo.af_packet->prot_hook.dev = dev;
822 
823         sk->protinfo.af_packet->ifindex = dev ? dev->ifindex : 0;
824 
825         if (protocol == 0)
826                 goto out_unlock;
827 
828         if (dev) {
829                 if (dev->flags&IFF_UP) {
830                         dev_add_pack(&sk->protinfo.af_packet->prot_hook);
831                         sock_hold(sk);
832                         sk->protinfo.af_packet->running = 1;
833                 } else {
834                         sk->err = ENETDOWN;
835                         if (!sk->dead)
836                                 sk->error_report(sk);
837                 }
838         } else {
839                 dev_add_pack(&sk->protinfo.af_packet->prot_hook);
840                 sock_hold(sk);
841                 sk->protinfo.af_packet->running = 1;
842         }
843 
844 out_unlock:
845         spin_unlock(&sk->protinfo.af_packet->bind_lock);
846         release_sock(sk);
847         return 0;
848 }
849 
850 /*
851  *      Bind a packet socket to a device
852  */
853 
854 #ifdef CONFIG_SOCK_PACKET
855 
856 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
857 {
858         struct sock *sk=sock->sk;
859         char name[15];
860         struct net_device *dev;
861         int err = -ENODEV;
862         
863         /*
864          *      Check legality
865          */
866          
867         if(addr_len!=sizeof(struct sockaddr))
868                 return -EINVAL;
869         strncpy(name,uaddr->sa_data,14);
870         name[14]=0;
871 
872         dev = dev_get_by_name(name);
873         if (dev) {
874                 err = packet_do_bind(sk, dev, sk->num);
875                 dev_put(dev);
876         }
877         return err;
878 }
879 #endif
880 
881 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
882 {
883         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
884         struct sock *sk=sock->sk;
885         struct net_device *dev = NULL;
886         int err;
887 
888 
889         /*
890          *      Check legality
891          */
892          
893         if (addr_len < sizeof(struct sockaddr_ll))
894                 return -EINVAL;
895         if (sll->sll_family != AF_PACKET)
896                 return -EINVAL;
897 
898         if (sll->sll_ifindex) {
899                 err = -ENODEV;
900                 dev = dev_get_by_index(sll->sll_ifindex);
901                 if (dev == NULL)
902                         goto out;
903         }
904         err = packet_do_bind(sk, dev, sll->sll_protocol ? : sk->num);
905         if (dev)
906                 dev_put(dev);
907 
908 out:
909         return err;
910 }
911 
912 
913 /*
914  *      Create a packet of type SOCK_PACKET. 
915  */
916 
917 static int packet_create(struct socket *sock, int protocol)
918 {
919         struct sock *sk;
920         int err;
921 
922         if (!capable(CAP_NET_RAW))
923                 return -EPERM;
924         if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
925 #ifdef CONFIG_SOCK_PACKET
926             && sock->type != SOCK_PACKET
927 #endif
928             )
929                 return -ESOCKTNOSUPPORT;
930 
931         sock->state = SS_UNCONNECTED;
932         MOD_INC_USE_COUNT;
933 
934         err = -ENOBUFS;
935         sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1);
936         if (sk == NULL)
937                 goto out;
938 
939         sock->ops = &packet_ops;
940 #ifdef CONFIG_SOCK_PACKET
941         if (sock->type == SOCK_PACKET)
942                 sock->ops = &packet_ops_spkt;
943 #endif
944         sock_init_data(sock,sk);
945 
946         sk->protinfo.af_packet = kmalloc(sizeof(struct packet_opt), GFP_KERNEL);
947         if (sk->protinfo.af_packet == NULL)
948                 goto out_free;
949         memset(sk->protinfo.af_packet, 0, sizeof(struct packet_opt));
950         sk->family = PF_PACKET;
951         sk->num = protocol;
952 
953         sk->destruct = packet_sock_destruct;
954         atomic_inc(&packet_socks_nr);
955 
956         /*
957          *      Attach a protocol block
958          */
959 
960         spin_lock_init(&sk->protinfo.af_packet->bind_lock);
961         sk->protinfo.af_packet->prot_hook.func = packet_rcv;
962 #ifdef CONFIG_SOCK_PACKET
963         if (sock->type == SOCK_PACKET)
964                 sk->protinfo.af_packet->prot_hook.func = packet_rcv_spkt;
965 #endif
966         sk->protinfo.af_packet->prot_hook.data = (void *)sk;
967 
968         if (protocol) {
969                 sk->protinfo.af_packet->prot_hook.type = protocol;
970                 dev_add_pack(&sk->protinfo.af_packet->prot_hook);
971                 sock_hold(sk);
972                 sk->protinfo.af_packet->running = 1;
973         }
974 
975         write_lock_bh(&packet_sklist_lock);
976         sk->next = packet_sklist;
977         packet_sklist = sk;
978         sock_hold(sk);
979         write_unlock_bh(&packet_sklist_lock);
980         return(0);
981 
982 out_free:
983         sk_free(sk);
984 out:
985         MOD_DEC_USE_COUNT;
986         return err;
987 }
988 
989 /*
990  *      Pull a packet from our receive queue and hand it to the user.
991  *      If necessary we block.
992  */
993 
994 static int packet_recvmsg(struct socket *sock, struct msghdr *msg, int len,
995                           int flags, struct scm_cookie *scm)
996 {
997         struct sock *sk = sock->sk;
998         struct sk_buff *skb;
999         int copied, err;
1000 
1001         err = -EINVAL;
1002         if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC))
1003                 goto out;
1004 
1005 #if 0
1006         /* What error should we return now? EUNATTACH? */
1007         if (sk->protinfo.af_packet->ifindex < 0)
1008                 return -ENODEV;
1009 #endif
1010 
1011         /*
1012          *      If the address length field is there to be filled in, we fill
1013          *      it in now.
1014          */
1015 
1016         if (sock->type == SOCK_PACKET)
1017                 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1018         else
1019                 msg->msg_namelen = sizeof(struct sockaddr_ll);
1020 
1021         /*
1022          *      Call the generic datagram receiver. This handles all sorts
1023          *      of horrible races and re-entrancy so we can forget about it
1024          *      in the protocol layers.
1025          *
1026          *      Now it will return ENETDOWN, if device have just gone down,
1027          *      but then it will block.
1028          */
1029 
1030         skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1031 
1032         /*
1033          *      An error occurred so return it. Because skb_recv_datagram() 
1034          *      handles the blocking we don't see and worry about blocking
1035          *      retries.
1036          */
1037 
1038         if(skb==NULL)
1039                 goto out;
1040 
1041         /*
1042          *      You lose any data beyond the buffer you gave. If it worries a
1043          *      user program they can ask the device for its MTU anyway.
1044          */
1045 
1046         copied = skb->len;
1047         if (copied > len)
1048         {
1049                 copied=len;
1050                 msg->msg_flags|=MSG_TRUNC;
1051         }
1052 
1053         /* We can't use skb_copy_datagram here */
1054         err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
1055         if (err)
1056                 goto out_free;
1057 
1058         sock_recv_timestamp(msg, sk, skb);
1059 
1060         if (msg->msg_name)
1061                 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1062 
1063         /*
1064          *      Free or return the buffer as appropriate. Again this
1065          *      hides all the races and re-entrancy issues from us.
1066          */
1067         err = (flags&MSG_TRUNC) ? skb->len : copied;
1068 
1069 out_free:
1070         skb_free_datagram(sk, skb);
1071 out:
1072         return err;
1073 }
1074 
1075 #ifdef CONFIG_SOCK_PACKET
1076 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1077                                int *uaddr_len, int peer)
1078 {
1079         struct net_device *dev;
1080         struct sock *sk = sock->sk;
1081 
1082         if (peer)
1083                 return -EOPNOTSUPP;
1084 
1085         uaddr->sa_family = AF_PACKET;
1086         dev = dev_get_by_index(sk->protinfo.af_packet->ifindex);
1087         if (dev) {
1088                 strncpy(uaddr->sa_data, dev->name, 15);
1089                 dev_put(dev);
1090         } else
1091                 memset(uaddr->sa_data, 0, 14);
1092         *uaddr_len = sizeof(*uaddr);
1093 
1094         return 0;
1095 }
1096 #endif
1097 
1098 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1099                           int *uaddr_len, int peer)
1100 {
1101         struct net_device *dev;
1102         struct sock *sk = sock->sk;
1103         struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1104 
1105         if (peer)
1106                 return -EOPNOTSUPP;
1107 
1108         sll->sll_family = AF_PACKET;
1109         sll->sll_ifindex = sk->protinfo.af_packet->ifindex;
1110         sll->sll_protocol = sk->num;
1111         dev = dev_get_by_index(sk->protinfo.af_packet->ifindex);
1112         if (dev) {
1113                 sll->sll_hatype = dev->type;
1114                 sll->sll_halen = dev->addr_len;
1115                 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1116                 dev_put(dev);
1117         } else {
1118                 sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
1119                 sll->sll_halen = 0;
1120         }
1121         *uaddr_len = sizeof(*sll);
1122 
1123         return 0;
1124 }
1125 
1126 #ifdef CONFIG_PACKET_MULTICAST
1127 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1128 {
1129         switch (i->type) {
1130         case PACKET_MR_MULTICAST:
1131                 if (what > 0)
1132                         dev_mc_add(dev, i->addr, i->alen, 0);
1133                 else
1134                         dev_mc_delete(dev, i->addr, i->alen, 0);
1135                 break;
1136         case PACKET_MR_PROMISC:
1137                 dev_set_promiscuity(dev, what);
1138                 break;
1139         case PACKET_MR_ALLMULTI:
1140                 dev_set_allmulti(dev, what);
1141                 break;
1142         default:;
1143         }
1144 }
1145 
1146 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1147 {
1148         for ( ; i; i=i->next) {
1149                 if (i->ifindex == dev->ifindex)
1150                         packet_dev_mc(dev, i, what);
1151         }
1152 }
1153 
1154 static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
1155 {
1156         struct packet_mclist *ml, *i;
1157         struct net_device *dev;
1158         int err;
1159 
1160         rtnl_lock();
1161 
1162         err = -ENODEV;
1163         dev = __dev_get_by_index(mreq->mr_ifindex);
1164         if (!dev)
1165                 goto done;
1166 
1167         err = -EINVAL;
1168         if (mreq->mr_alen > dev->addr_len)
1169                 goto done;
1170 
1171         err = -ENOBUFS;
1172         i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
1173         if (i == NULL)
1174                 goto done;
1175 
1176         err = 0;
1177         for (ml=sk->protinfo.af_packet->mclist; ml; ml=ml->next) {
1178                 if (ml->ifindex == mreq->mr_ifindex &&
1179                     ml->type == mreq->mr_type &&
1180                     ml->alen == mreq->mr_alen &&
1181                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1182                         ml->count++;
1183                         /* Free the new element ... */
1184                         kfree(i);
1185                         goto done;
1186                 }
1187         }
1188 
1189         i->type = mreq->mr_type;
1190         i->ifindex = mreq->mr_ifindex;
1191         i->alen = mreq->mr_alen;
1192         memcpy(i->addr, mreq->mr_address, i->alen);
1193         i->count = 1;
1194         i->next = sk->protinfo.af_packet->mclist;
1195         sk->protinfo.af_packet->mclist = i;
1196         packet_dev_mc(dev, i, +1);
1197 
1198 done:
1199         rtnl_unlock();
1200         return err;
1201 }
1202 
1203 static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
1204 {
1205         struct packet_mclist *ml, **mlp;
1206 
1207         rtnl_lock();
1208 
1209         for (mlp=&sk->protinfo.af_packet->mclist; (ml=*mlp)!=NULL; mlp=&ml->next) {
1210                 if (ml->ifindex == mreq->mr_ifindex &&
1211                     ml->type == mreq->mr_type &&
1212                     ml->alen == mreq->mr_alen &&
1213                     memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1214                         if (--ml->count == 0) {
1215                                 struct net_device *dev;
1216                                 *mlp = ml->next;
1217                                 dev = dev_get_by_index(ml->ifindex);
1218                                 if (dev) {
1219                                         packet_dev_mc(dev, ml, -1);
1220                                         dev_put(dev);
1221                                 }
1222                                 kfree(ml);
1223                         }
1224                         rtnl_unlock();
1225                         return 0;
1226                 }
1227         }
1228         rtnl_unlock();
1229         return -EADDRNOTAVAIL;
1230 }
1231 
1232 static void packet_flush_mclist(struct sock *sk)
1233 {
1234         struct packet_mclist *ml;
1235 
1236         if (sk->protinfo.af_packet->mclist == NULL)
1237                 return;
1238 
1239         rtnl_lock();
1240         while ((ml=sk->protinfo.af_packet->mclist) != NULL) {
1241                 struct net_device *dev;
1242                 sk->protinfo.af_packet->mclist = ml->next;
1243                 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1244                         packet_dev_mc(dev, ml, -1);
1245                         dev_put(dev);
1246                 }
1247                 kfree(ml);
1248         }
1249         rtnl_unlock();
1250 }
1251 #endif
1252 
1253 static int
1254 packet_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen)
1255 {
1256         struct sock *sk = sock->sk;
1257         int ret;
1258 
1259         if (level != SOL_PACKET)
1260                 return -ENOPROTOOPT;
1261 
1262         switch(optname) {
1263 #ifdef CONFIG_PACKET_MULTICAST
1264         case PACKET_ADD_MEMBERSHIP:     
1265         case PACKET_DROP_MEMBERSHIP:
1266         {
1267                 struct packet_mreq mreq;
1268                 if (optlen<sizeof(mreq))
1269                         return -EINVAL;
1270                 if (copy_from_user(&mreq,optval,sizeof(mreq)))
1271                         return -EFAULT;
1272                 if (optname == PACKET_ADD_MEMBERSHIP)
1273                         ret = packet_mc_add(sk, &mreq);
1274                 else
1275                         ret = packet_mc_drop(sk, &mreq);
1276                 return ret;
1277         }
1278 #endif
1279 #ifdef CONFIG_PACKET_MMAP
1280         case PACKET_RX_RING:
1281         {
1282                 struct tpacket_req req;
1283 
1284                 if (optlen<sizeof(req))
1285                         return -EINVAL;
1286                 if (copy_from_user(&req,optval,sizeof(req)))
1287                         return -EFAULT;
1288                 return packet_set_ring(sk, &req, 0);
1289         }
1290         case PACKET_COPY_THRESH:
1291         {
1292                 int val;
1293 
1294                 if (optlen!=sizeof(val))
1295                         return -EINVAL;
1296                 if (copy_from_user(&val,optval,sizeof(val)))
1297                         return -EFAULT;
1298 
1299                 sk->protinfo.af_packet->copy_thresh = val;
1300                 return 0;
1301         }
1302 #endif
1303         default:
1304                 return -ENOPROTOOPT;
1305         }
1306 }
1307 
1308 int packet_getsockopt(struct socket *sock, int level, int optname,
1309                       char *optval, int *optlen)
1310 {
1311         int len;
1312         struct sock *sk = sock->sk;
1313 
1314         if (level != SOL_PACKET)
1315                 return -ENOPROTOOPT;
1316 
1317         if (get_user(len,optlen))
1318                 return -EFAULT;
1319 
1320         switch(optname) {
1321         case PACKET_STATISTICS:
1322         {
1323                 struct tpacket_stats st;
1324 
1325                 if (len > sizeof(struct tpacket_stats))
1326                         len = sizeof(struct tpacket_stats);
1327                 spin_lock_bh(&sk->receive_queue.lock);
1328                 st = sk->protinfo.af_packet->stats;
1329                 memset(&sk->protinfo.af_packet->stats, 0, sizeof(st));
1330                 spin_unlock_bh(&sk->receive_queue.lock);
1331                 st.tp_packets += st.tp_drops;
1332 
1333                 if (copy_to_user(optval, &st, len))
1334                         return -EFAULT;
1335                 break;
1336         }
1337         default:
1338                 return -ENOPROTOOPT;
1339         }
1340 
1341         if (put_user(len, optlen))
1342                 return -EFAULT;
1343         return 0;
1344 }
1345 
1346 
1347 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1348 {
1349         struct sock *sk;
1350         struct packet_opt *po;
1351         struct net_device *dev = (struct net_device*)data;
1352 
1353         read_lock(&packet_sklist_lock);
1354         for (sk = packet_sklist; sk; sk = sk->next) {
1355                 po = sk->protinfo.af_packet;
1356 
1357                 switch (msg) {
1358                 case NETDEV_DOWN:
1359                 case NETDEV_UNREGISTER:
1360                         if (dev->ifindex == po->ifindex) {
1361                                 spin_lock(&po->bind_lock);
1362                                 if (po->running) {
1363                                         dev_remove_pack(&po->prot_hook);
1364                                         __sock_put(sk);
1365                                         po->running = 0;
1366                                         sk->err = ENETDOWN;
1367                                         if (!sk->dead)
1368                                                 sk->error_report(sk);
1369                                 }
1370                                 if (msg == NETDEV_UNREGISTER) {
1371                                         po->ifindex = -1;
1372                                         po->prot_hook.dev = NULL;
1373                                 }
1374                                 spin_unlock(&po->bind_lock);
1375                         }
1376 #ifdef CONFIG_PACKET_MULTICAST
1377                         if (po->mclist)
1378                                 packet_dev_mclist(dev, po->mclist, -1);
1379 #endif
1380                         break;
1381                 case NETDEV_UP:
1382                         spin_lock(&po->bind_lock);
1383                         if (dev->ifindex == po->ifindex && sk->num && po->running==0) {
1384                                 dev_add_pack(&po->prot_hook);
1385                                 sock_hold(sk);
1386                                 po->running = 1;
1387                         }
1388                         spin_unlock(&po->bind_lock);
1389 #ifdef CONFIG_PACKET_MULTICAST
1390                         if (po->mclist)
1391                                 packet_dev_mclist(dev, po->mclist, +1);
1392 #endif
1393                         break;
1394                 }
1395         }
1396         read_unlock(&packet_sklist_lock);
1397         return NOTIFY_DONE;
1398 }
1399 
1400 
1401 static int packet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1402 {
1403         struct sock *sk = sock->sk;
1404         int err;
1405         int pid;
1406 
1407         switch(cmd) 
1408         {
1409                 case SIOCOUTQ:
1410                 {
1411                         int amount = atomic_read(&sk->wmem_alloc);
1412                         return put_user(amount, (int *)arg);
1413                 }
1414                 case SIOCINQ:
1415                 {
1416                         struct sk_buff *skb;
1417                         int amount = 0;
1418 
1419                         spin_lock_bh(&sk->receive_queue.lock);
1420                         skb = skb_peek(&sk->receive_queue);
1421                         if (skb)
1422                                 amount = skb->len;
1423                         spin_unlock_bh(&sk->receive_queue.lock);
1424                         return put_user(amount, (int *)arg);
1425                 }
1426                 case FIOSETOWN:
1427                 case SIOCSPGRP:
1428                         err = get_user(pid, (int *) arg);
1429                         if (err)
1430                                 return err; 
1431                         if (current->pid != pid && current->pgrp != -pid && 
1432                             !capable(CAP_NET_ADMIN))
1433                                 return -EPERM;
1434                         sk->proc = pid;
1435                         return(0);
1436                 case FIOGETOWN:
1437                 case SIOCGPGRP:
1438                         return put_user(sk->proc, (int *)arg);
1439                 case SIOCGSTAMP:
1440                         if(sk->stamp.tv_sec==0)
1441                                 return -ENOENT;
1442                         err = -EFAULT;
1443                         if (!copy_to_user((void *)arg, &sk->stamp, sizeof(struct timeval)))
1444                                 err = 0;
1445                         return err;
1446                 case SIOCGIFFLAGS:
1447 #ifndef CONFIG_INET
1448                 case SIOCSIFFLAGS:
1449 #endif
1450                 case SIOCGIFCONF:
1451                 case SIOCGIFMETRIC:
1452                 case SIOCSIFMETRIC:
1453                 case SIOCGIFMEM:
1454                 case SIOCSIFMEM:
1455                 case SIOCGIFMTU:
1456                 case SIOCSIFMTU:
1457                 case SIOCSIFLINK:
1458                 case SIOCGIFHWADDR:
1459                 case SIOCSIFHWADDR:
1460                 case SIOCSIFMAP:
1461                 case SIOCGIFMAP:
1462                 case SIOCSIFSLAVE:
1463                 case SIOCGIFSLAVE:
1464                 case SIOCGIFINDEX:
1465                 case SIOCGIFNAME:
1466                 case SIOCGIFCOUNT:
1467                 case SIOCSIFHWBROADCAST:
1468                         return(dev_ioctl(cmd,(void *) arg));
1469 
1470                 case SIOCGIFBR:
1471                 case SIOCSIFBR:
1472 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1473 #ifdef CONFIG_INET
1474 #ifdef CONFIG_KMOD
1475                         if (br_ioctl_hook == NULL)
1476                                 request_module("bridge");
1477 #endif
1478                         if (br_ioctl_hook != NULL)
1479                                 return br_ioctl_hook(arg);
1480 #endif
1481 #endif                          
1482 
1483                 case SIOCGIFDIVERT:
1484                 case SIOCSIFDIVERT:
1485 #ifdef CONFIG_NET_DIVERT
1486                         return(divert_ioctl(cmd, (struct divert_cf *) arg));
1487 #else
1488                         return -ENOPKG;
1489 #endif /* CONFIG_NET_DIVERT */
1490 
1491                         return -ENOPKG;
1492                         
1493 #ifdef CONFIG_INET
1494                 case SIOCADDRT:
1495                 case SIOCDELRT:
1496                 case SIOCDARP:
1497                 case SIOCGARP:
1498                 case SIOCSARP:
1499                 case SIOCGIFADDR:
1500                 case SIOCSIFADDR:
1501                 case SIOCGIFBRDADDR:
1502                 case SIOCSIFBRDADDR:
1503                 case SIOCGIFNETMASK:
1504                 case SIOCSIFNETMASK:
1505                 case SIOCGIFDSTADDR:
1506                 case SIOCSIFDSTADDR:
1507                 case SIOCSIFFLAGS:
1508                 case SIOCADDDLCI:
1509                 case SIOCDELDLCI:
1510                         return inet_dgram_ops.ioctl(sock, cmd, arg);
1511 #endif
1512 
1513                 default:
1514                         if ((cmd >= SIOCDEVPRIVATE) &&
1515                             (cmd <= (SIOCDEVPRIVATE + 15)))
1516                                 return(dev_ioctl(cmd,(void *) arg));
1517 
1518 #ifdef CONFIG_NET_RADIO
1519                         if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST))
1520                                 return(dev_ioctl(cmd,(void *) arg));
1521 #endif
1522                         return -EOPNOTSUPP;
1523         }
1524         /*NOTREACHED*/
1525         return(0);
1526 }
1527 
1528 #ifndef CONFIG_PACKET_MMAP
1529 #define packet_mmap sock_no_mmap
1530 #define packet_poll datagram_poll
1531 #else
1532 
1533 unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wait)
1534 {
1535         struct sock *sk = sock->sk;
1536         struct packet_opt *po = sk->protinfo.af_packet;
1537         unsigned int mask = datagram_poll(file, sock, wait);
1538 
1539         spin_lock_bh(&sk->receive_queue.lock);
1540         if (po->iovec) {
1541                 unsigned last = po->head ? po->head-1 : po->iovmax;
1542 
1543                 if (po->iovec[last]->tp_status)
1544                         mask |= POLLIN | POLLRDNORM;
1545         }
1546         spin_unlock_bh(&sk->receive_queue.lock);
1547         return mask;
1548 }
1549 
1550 
1551 /* Dirty? Well, I still did not learn better way to account
1552  * for user mmaps.
1553  */
1554 
1555 static void packet_mm_open(struct vm_area_struct *vma)
1556 {
1557         struct file *file = vma->vm_file;
1558         struct inode *inode = file->f_dentry->d_inode;
1559         struct socket * sock = &inode->u.socket_i;
1560         struct sock *sk = sock->sk;
1561         
1562         if (sk)
1563                 atomic_inc(&sk->protinfo.af_packet->mapped);
1564 }
1565 
1566 static void packet_mm_close(struct vm_area_struct *vma)
1567 {
1568         struct file *file = vma->vm_file;
1569         struct inode *inode = file->f_dentry->d_inode;
1570         struct socket * sock = &inode->u.socket_i;
1571         struct sock *sk = sock->sk;
1572         
1573         if (sk)
1574                 atomic_dec(&sk->protinfo.af_packet->mapped);
1575 }
1576 
1577 static struct vm_operations_struct packet_mmap_ops = {
1578         open:   packet_mm_open,
1579         close:  packet_mm_close,
1580 };
1581 
1582 static void free_pg_vec(unsigned long *pg_vec, unsigned order, unsigned len)
1583 {
1584         int i;
1585 
1586         for (i=0; i<len; i++) {
1587                 if (pg_vec[i]) {
1588                         struct page *page, *pend;
1589 
1590                         pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1591                         for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1592                                 ClearPageReserved(page);
1593                         free_pages(pg_vec[i], order);
1594                 }
1595         }
1596         kfree(pg_vec);
1597 }
1598 
1599 
1600 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1601 {
1602         unsigned long *pg_vec = NULL;
1603         struct tpacket_hdr **io_vec = NULL;
1604         struct packet_opt *po = sk->protinfo.af_packet;
1605         int order = 0;
1606         int err = 0;
1607 
1608         if (req->tp_block_nr) {
1609                 int i, l;
1610                 int frames_per_block;
1611 
1612                 /* Sanity tests and some calculations */
1613                 if ((int)req->tp_block_size <= 0)
1614                         return -EINVAL;
1615                 if (req->tp_block_size&(PAGE_SIZE-1))
1616                         return -EINVAL;
1617                 if (req->tp_frame_size < TPACKET_HDRLEN)
1618                         return -EINVAL;
1619                 if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
1620                         return -EINVAL;
1621                 frames_per_block = req->tp_block_size/req->tp_frame_size;
1622                 if (frames_per_block <= 0)
1623                         return -EINVAL;
1624                 if (frames_per_block*req->tp_block_nr != req->tp_frame_nr)
1625                         return -EINVAL;
1626                 /* OK! */
1627 
1628                 /* Allocate page vector */
1629                 while ((PAGE_SIZE<<order) < req->tp_block_size)
1630                         order++;
1631 
1632                 err = -ENOMEM;
1633 
1634                 pg_vec = kmalloc(req->tp_block_nr*sizeof(unsigned long*), GFP_KERNEL);
1635                 if (pg_vec == NULL)
1636                         goto out;
1637                 memset(pg_vec, 0, req->tp_block_nr*sizeof(unsigned long*));
1638 
1639                 for (i=0; i<req->tp_block_nr; i++) {
1640                         struct page *page, *pend;
1641                         pg_vec[i] = __get_free_pages(GFP_KERNEL, order);
1642                         if (!pg_vec[i])
1643                                 goto out_free_pgvec;
1644 
1645                         pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1646                         for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1647                                 SetPageReserved(page);
1648                 }
1649                 /* Page vector is allocated */
1650 
1651                 /* Draw frames */
1652                 io_vec = kmalloc(req->tp_frame_nr*sizeof(struct tpacket_hdr*), GFP_KERNEL);
1653                 if (io_vec == NULL)
1654                         goto out_free_pgvec;
1655                 memset(io_vec, 0, req->tp_frame_nr*sizeof(struct tpacket_hdr*));
1656 
1657                 l = 0;
1658                 for (i=0; i<req->tp_block_nr; i++) {
1659                         unsigned long ptr = pg_vec[i];
1660                         int k;
1661 
1662                         for (k=0; k<frames_per_block; k++, l++) {
1663                                 io_vec[l] = (struct tpacket_hdr*)ptr;
1664                                 io_vec[l]->tp_status = TP_STATUS_KERNEL;
1665                                 ptr += req->tp_frame_size;
1666                         }
1667                 }
1668                 /* Done */
1669         } else {
1670                 if (req->tp_frame_nr)
1671                         return -EINVAL;
1672         }
1673 
1674         lock_sock(sk);
1675 
1676         /* Detach socket from network */
1677         spin_lock(&po->bind_lock);
1678         if (po->running)
1679                 dev_remove_pack(&po->prot_hook);
1680         spin_unlock(&po->bind_lock);
1681 
1682         err = -EBUSY;
1683         if (closing || atomic_read(&po->mapped) == 0) {
1684                 err = 0;
1685 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1686 
1687                 spin_lock_bh(&sk->receive_queue.lock);
1688                 pg_vec = XC(po->pg_vec, pg_vec);
1689                 io_vec = XC(po->iovec, io_vec);
1690                 po->iovmax = req->tp_frame_nr-1;
1691                 po->head = 0;
1692                 po->frame_size = req->tp_frame_size;
1693                 spin_unlock_bh(&sk->receive_queue.lock);
1694 
1695                 order = XC(po->pg_vec_order, order);
1696                 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1697 
1698                 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1699                 po->prot_hook.func = po->iovec ? tpacket_rcv : packet_rcv;
1700                 skb_queue_purge(&sk->receive_queue);
1701 #undef XC
1702                 if (atomic_read(&po->mapped))
1703                         printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1704         }
1705 
1706         spin_lock(&po->bind_lock);
1707         if (po->running)
1708                 dev_add_pack(&po->prot_hook);
1709         spin_unlock(&po->bind_lock);
1710 
1711         release_sock(sk);
1712 
1713         if (io_vec)
1714                 kfree(io_vec);
1715 
1716 out_free_pgvec:
1717         if (pg_vec)
1718                 free_pg_vec(pg_vec, order, req->tp_block_nr);
1719 out:
1720         return err;
1721 }
1722 
1723 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1724 {
1725         struct sock *sk = sock->sk;
1726         struct packet_opt *po = sk->protinfo.af_packet;
1727         unsigned long size;
1728         unsigned long start;
1729         int err = -EINVAL;
1730         int i;
1731 
1732         if (vma->vm_pgoff)
1733                 return -EINVAL;
1734 
1735         size = vma->vm_end - vma->vm_start;
1736 
1737         lock_sock(sk);
1738         if (po->pg_vec == NULL)
1739                 goto out;
1740         if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1741                 goto out;
1742 
1743         atomic_inc(&po->mapped);
1744         start = vma->vm_start;
1745         err = -EAGAIN;
1746         for (i=0; i<po->pg_vec_len; i++) {
1747                 if (remap_page_range(start, __pa(po->pg_vec[i]),
1748                                      po->pg_vec_pages*PAGE_SIZE,
1749                                      vma->vm_page_prot))
1750                         goto out;
1751                 start += po->pg_vec_pages*PAGE_SIZE;
1752         }
1753         vma->vm_ops = &packet_mmap_ops;
1754         err = 0;
1755 
1756 out:
1757         release_sock(sk);
1758         return err;
1759 }
1760 #endif
1761 
1762 
1763 #ifdef CONFIG_SOCK_PACKET
1764 struct proto_ops packet_ops_spkt = {
1765         family:         PF_PACKET,
1766 
1767         release:        packet_release,
1768         bind:           packet_bind_spkt,
1769         connect:        sock_no_connect,
1770         socketpair:     sock_no_socketpair,
1771         accept:         sock_no_accept,
1772         getname:        packet_getname_spkt,
1773         poll:           datagram_poll,
1774         ioctl:          packet_ioctl,
1775         listen:         sock_no_listen,
1776         shutdown:       sock_no_shutdown,
1777         setsockopt:     sock_no_setsockopt,
1778         getsockopt:     sock_no_getsockopt,
1779         sendmsg:        packet_sendmsg_spkt,
1780         recvmsg:        packet_recvmsg,
1781         mmap:           sock_no_mmap,
1782 };
1783 #endif
1784 
1785 struct proto_ops packet_ops = {
1786         family:         PF_PACKET,
1787 
1788         release:        packet_release,
1789         bind:           packet_bind,
1790         connect:        sock_no_connect,
1791         socketpair:     sock_no_socketpair,
1792         accept:         sock_no_accept,
1793         getname:        packet_getname, 
1794         poll:           packet_poll,
1795         ioctl:          packet_ioctl,
1796         listen:         sock_no_listen,
1797         shutdown:       sock_no_shutdown,
1798         setsockopt:     packet_setsockopt,
1799         getsockopt:     packet_getsockopt,
1800         sendmsg:        packet_sendmsg,
1801         recvmsg:        packet_recvmsg,
1802         mmap:           packet_mmap,
1803 };
1804 
1805 static struct net_proto_family packet_family_ops = {
1806         PF_PACKET,
1807         packet_create
1808 };
1809 
1810 struct notifier_block packet_netdev_notifier={
1811         packet_notifier,
1812         NULL,
1813         0
1814 };
1815 
1816 #ifdef CONFIG_PROC_FS
1817 static int packet_read_proc(char *buffer, char **start, off_t offset,
1818                              int length, int *eof, void *data)
1819 {
1820         off_t pos=0;
1821         off_t begin=0;
1822         int len=0;
1823         struct sock *s;
1824         
1825         len+= sprintf(buffer,"sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
1826 
1827         read_lock(&packet_sklist_lock);
1828 
1829         for (s = packet_sklist; s; s = s->next) {
1830                 len+=sprintf(buffer+len,"%p %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu",
1831                              s,
1832                              atomic_read(&s->refcnt),
1833                              s->type,
1834                              ntohs(s->num),
1835                              s->protinfo.af_packet->ifindex,
1836                              s->protinfo.af_packet->running,
1837                              atomic_read(&s->rmem_alloc),
1838                              sock_i_uid(s),
1839                              sock_i_ino(s)
1840                              );
1841 
1842                 buffer[len++]='\n';
1843                 
1844                 pos=begin+len;
1845                 if(pos<offset) {
1846                         len=0;
1847                         begin=pos;
1848                 }
1849                 if(pos>offset+length)
1850                         goto done;
1851         }
1852         *eof = 1;
1853 
1854 done:
1855         read_unlock(&packet_sklist_lock);
1856         *start=buffer+(offset-begin);
1857         len-=(offset-begin);
1858         if(len>length)
1859                 len=length;
1860         if(len<0)
1861                 len=0;
1862         return len;
1863 }
1864 #endif
1865 
1866 
1867 
1868 static void __exit packet_exit(void)
1869 {
1870 #ifdef CONFIG_PROC_FS
1871         remove_proc_entry("net/packet", 0);
1872 #endif
1873         unregister_netdevice_notifier(&packet_netdev_notifier);
1874         sock_unregister(PF_PACKET);
1875         return;
1876 }
1877 
1878 
1879 static int __init packet_init(void)
1880 {
1881         sock_register(&packet_family_ops);
1882         register_netdevice_notifier(&packet_netdev_notifier);
1883 #ifdef CONFIG_PROC_FS
1884         create_proc_read_entry("net/packet", 0, 0, packet_read_proc, NULL);
1885 #endif
1886         return 0;
1887 }
1888 
1889 
1890 module_init(packet_init);
1891 module_exit(packet_exit);
1892 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.