1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
8 * Version: $Id: af_packet.c,v 1.47 2000/12/08 17:15:54 davem Exp $
9 *
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 *
14 * Fixes:
15 * Alan Cox : verify_area() now used correctly
16 * Alan Cox : new skbuff lists, look ma no backlogs!
17 * Alan Cox : tidied skbuff lists.
18 * Alan Cox : Now uses generic datagram routines I
19 * added. Also fixed the peek/read crash
20 * from all old Linux datagram code.
21 * Alan Cox : Uses the improved datagram code.
22 * Alan Cox : Added NULL's for socket options.
23 * Alan Cox : Re-commented the code.
24 * Alan Cox : Use new kernel side addressing
25 * Rob Janssen : Correct MTU usage.
26 * Dave Platt : Counter leaks caused by incorrect
27 * interrupt locking and some slightly
28 * dubious gcc output. Can you read
29 * compiler: it said _VOLATILE_
30 * Richard Kooijman : Timestamp fixes.
31 * Alan Cox : New buffers. Use sk->mac.raw.
32 * Alan Cox : sendmsg/recvmsg support.
33 * Alan Cox : Protocol setting support
34 * Alexey Kuznetsov : Untied from IPv4 stack.
35 * Cyrus Durgin : Fixed kerneld for kmod.
36 * Michal Ostrowski : Module initialization cleanup.
37 *
38 * This program is free software; you can redistribute it and/or
39 * modify it under the terms of the GNU General Public License
40 * as published by the Free Software Foundation; either version
41 * 2 of the License, or (at your option) any later version.
42 *
43 */
44
45 #include <linux/config.h>
46 #include <linux/types.h>
47 #include <linux/sched.h>
48 #include <linux/mm.h>
49 #include <linux/fcntl.h>
50 #include <linux/socket.h>
51 #include <linux/in.h>
52 #include <linux/inet.h>
53 #include <linux/netdevice.h>
54 #include <linux/if_packet.h>
55 #include <linux/wireless.h>
56 #include <linux/kmod.h>
57 #include <net/ip.h>
58 #include <net/protocol.h>
59 #include <linux/skbuff.h>
60 #include <net/sock.h>
61 #include <linux/errno.h>
62 #include <linux/timer.h>
63 #include <asm/system.h>
64 #include <asm/uaccess.h>
65 #include <linux/proc_fs.h>
66 #include <linux/poll.h>
67 #include <linux/module.h>
68 #include <linux/init.h>
69 #include <linux/if_bridge.h>
70
71 #ifdef CONFIG_NET_DIVERT
72 #include <linux/divert.h>
73 #endif /* CONFIG_NET_DIVERT */
74
75 #ifdef CONFIG_INET
76 #include <net/inet_common.h>
77 #endif
78
79 #ifdef CONFIG_DLCI
80 extern int dlci_ioctl(unsigned int, void*);
81 #endif
82
83 #define CONFIG_SOCK_PACKET 1
84
85 /*
86 Proposed replacement for SIOC{ADD,DEL}MULTI and
87 IFF_PROMISC, IFF_ALLMULTI flags.
88
89 It is more expensive, but I believe,
90 it is really correct solution: reentereble, safe and fault tolerant.
91
92 IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
93 reference count and global flag, so that real status is
94 (gflag|(count != 0)), so that we can use obsolete faulty interface
95 not harming clever users.
96 */
97 #define CONFIG_PACKET_MULTICAST 1
98
99 /*
100 Assumptions:
101 - if device has no dev->hard_header routine, it adds and removes ll header
102 inside itself. In this case ll header is invisible outside of device,
103 but higher levels still should reserve dev->hard_header_len.
104 Some devices are enough clever to reallocate skb, when header
105 will not fit to reserved space (tunnel), another ones are silly
106 (PPP).
107 - packet socket receives packets with pulled ll header,
108 so that SOCK_RAW should push it back.
109
110 On receive:
111 -----------
112
113 Incoming, dev->hard_header!=NULL
114 mac.raw -> ll header
115 data -> data
116
117 Outgoing, dev->hard_header!=NULL
118 mac.raw -> ll header
119 data -> ll header
120
121 Incoming, dev->hard_header==NULL
122 mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
123 PPP makes it, that is wrong, because introduce assymetry
124 between rx and tx paths.
125 data -> data
126
127 Outgoing, dev->hard_header==NULL
128 mac.raw -> data. ll header is still not built!
129 data -> data
130
131 Resume
132 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
133
134
135 On transmit:
136 ------------
137
138 dev->hard_header != NULL
139 mac.raw -> ll header
140 data -> ll header
141
142 dev->hard_header == NULL (ll header is added by device, we cannot control it)
143 mac.raw -> data
144 data -> data
145
146 We should set nh.raw on output to correct posistion,
147 packet classifier depends on it.
148 */
149
150 /* List of all packet sockets. */
151 static struct sock * packet_sklist = NULL;
152 static rwlock_t packet_sklist_lock = RW_LOCK_UNLOCKED;
153
154 atomic_t packet_socks_nr;
155
156
157 /* Private packet socket structures. */
158
159 #ifdef CONFIG_PACKET_MULTICAST
160 struct packet_mclist
161 {
162 struct packet_mclist *next;
163 int ifindex;
164 int count;
165 unsigned short type;
166 unsigned short alen;
167 unsigned char addr[8];
168 };
169 #endif
170 #ifdef CONFIG_PACKET_MMAP
171 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
172 #endif
173
174 static void packet_flush_mclist(struct sock *sk);
175
176 struct packet_opt
177 {
178 struct packet_type prot_hook;
179 spinlock_t bind_lock;
180 char running; /* prot_hook is attached*/
181 int ifindex; /* bound device */
182 struct tpacket_stats stats;
183 #ifdef CONFIG_PACKET_MULTICAST
184 struct packet_mclist *mclist;
185 #endif
186 #ifdef CONFIG_PACKET_MMAP
187 atomic_t mapped;
188 unsigned long *pg_vec;
189 unsigned int pg_vec_order;
190 unsigned int pg_vec_pages;
191 unsigned int pg_vec_len;
192
193 struct tpacket_hdr **iovec;
194 unsigned int frame_size;
195 unsigned int iovmax;
196 unsigned int head;
197 int copy_thresh;
198 #endif
199 };
200
201 void packet_sock_destruct(struct sock *sk)
202 {
203 BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
204 BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
205
206 if (!sk->dead) {
207 printk("Attempt to release alive packet socket: %p\n", sk);
208 return;
209 }
210
211 if (sk->protinfo.destruct_hook)
212 kfree(sk->protinfo.destruct_hook);
213 atomic_dec(&packet_socks_nr);
214 #ifdef PACKET_REFCNT_DEBUG
215 printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
216 #endif
217 MOD_DEC_USE_COUNT;
218 }
219
220
221 extern struct proto_ops packet_ops;
222
223 #ifdef CONFIG_SOCK_PACKET
224 extern struct proto_ops packet_ops_spkt;
225
226 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
227 {
228 struct sock *sk;
229 struct sockaddr_pkt *spkt;
230
231 /*
232 * When we registered the protocol we saved the socket in the data
233 * field for just this event.
234 */
235
236 sk = (struct sock *) pt->data;
237
238 /*
239 * Yank back the headers [hope the device set this
240 * right or kerboom...]
241 *
242 * Incoming packets have ll header pulled,
243 * push it back.
244 *
245 * For outgoing ones skb->data == skb->mac.raw
246 * so that this procedure is noop.
247 */
248
249 if (skb->pkt_type == PACKET_LOOPBACK)
250 goto out;
251
252 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
253 goto oom;
254
255 spkt = (struct sockaddr_pkt*)skb->cb;
256
257 skb_push(skb, skb->data-skb->mac.raw);
258
259 /*
260 * The SOCK_PACKET socket receives _all_ frames.
261 */
262
263 spkt->spkt_family = dev->type;
264 strncpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
265 spkt->spkt_protocol = skb->protocol;
266
267 /*
268 * Charge the memory to the socket. This is done specifically
269 * to prevent sockets using all the memory up.
270 */
271
272 if (sock_queue_rcv_skb(sk,skb) == 0)
273 return 0;
274
275 out:
276 kfree_skb(skb);
277 oom:
278 return 0;
279 }
280
281
282 /*
283 * Output a raw packet to a device layer. This bypasses all the other
284 * protocol layers and you must therefore supply it with a complete frame
285 */
286
287 static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, int len,
288 struct scm_cookie *scm)
289 {
290 struct sock *sk = sock->sk;
291 struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
292 struct sk_buff *skb;
293 struct net_device *dev;
294 unsigned short proto=0;
295 int err;
296
297 /*
298 * Get and verify the address.
299 */
300
301 if (saddr)
302 {
303 if (msg->msg_namelen < sizeof(struct sockaddr))
304 return(-EINVAL);
305 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
306 proto=saddr->spkt_protocol;
307 }
308 else
309 return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */
310
311 /*
312 * Find the device first to size check it
313 */
314
315 saddr->spkt_device[13] = 0;
316 dev = dev_get_by_name(saddr->spkt_device);
317 err = -ENODEV;
318 if (dev == NULL)
319 goto out_unlock;
320
321 /*
322 * You may not queue a frame bigger than the mtu. This is the lowest level
323 * raw protocol and you must do your own fragmentation at this level.
324 */
325
326 err = -EMSGSIZE;
327 if(len>dev->mtu+dev->hard_header_len)
328 goto out_unlock;
329
330 err = -ENOBUFS;
331 skb = sock_wmalloc(sk, len+dev->hard_header_len+15, 0, GFP_KERNEL);
332
333 /*
334 * If the write buffer is full, then tough. At this level the user gets to
335 * deal with the problem - do your own algorithmic backoffs. That's far
336 * more flexible.
337 */
338
339 if (skb == NULL)
340 goto out_unlock;
341
342 /*
343 * Fill it in
344 */
345
346 /* FIXME: Save some space for broken drivers that write a
347 * hard header at transmission time by themselves. PPP is the
348 * notable one here. This should really be fixed at the driver level.
349 */
350 skb_reserve(skb,(dev->hard_header_len+15)&~15);
351 skb->nh.raw = skb->data;
352
353 /* Try to align data part correctly */
354 if (dev->hard_header) {
355 skb->data -= dev->hard_header_len;
356 skb->tail -= dev->hard_header_len;
357 }
358
359 /* Returns -EFAULT on error */
360 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
361 skb->protocol = proto;
362 skb->dev = dev;
363 skb->priority = sk->priority;
364 if (err)
365 goto out_free;
366
367 err = -ENETDOWN;
368 if (!(dev->flags & IFF_UP))
369 goto out_free;
370
371 /*
372 * Now send it
373 */
374
375 dev_queue_xmit(skb);
376 dev_put(dev);
377 return(len);
378
379 out_free:
380 kfree_skb(skb);
381 out_unlock:
382 if (dev)
383 dev_put(dev);
384 return err;
385 }
386 #endif
387
388 /*
389 This function makes lazy skb cloning in hope that most of packets
390 are discarded by BPF.
391
392 Note tricky part: we DO mangle shared skb! skb->data, skb->len
393 and skb->cb are mangled. It works because (and until) packets
394 falling here are owned by current CPU. Output packets are cloned
395 by dev_queue_xmit_nit(), input packets are processed by net_bh
396 sequencially, so that if we return skb to original state on exit,
397 we will not harm anyone.
398 */
399
400 static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
401 {
402 struct sock *sk;
403 struct sockaddr_ll *sll;
404 struct packet_opt *po;
405 u8 * skb_head = skb->data;
406 #ifdef CONFIG_FILTER
407 unsigned snaplen;
408 #endif
409
410 if (skb->pkt_type == PACKET_LOOPBACK)
411 goto drop;
412
413 sk = (struct sock *) pt->data;
414 po = sk->protinfo.af_packet;
415
416 skb->dev = dev;
417
418 if (dev->hard_header) {
419 /* The device has an explicit notion of ll header,
420 exported to higher levels.
421
422 Otherwise, the device hides datails of it frame
423 structure, so that corresponding packet head
424 never delivered to user.
425 */
426 if (sk->type != SOCK_DGRAM)
427 skb_push(skb, skb->data - skb->mac.raw);
428 else if (skb->pkt_type == PACKET_OUTGOING) {
429 /* Special case: outgoing packets have ll header at head */
430 skb_pull(skb, skb->nh.raw - skb->data);
431 }
432 }
433
434 #ifdef CONFIG_FILTER
435 snaplen = skb->len;
436
437 if (sk->filter) {
438 unsigned res = snaplen;
439 struct sk_filter *filter;
440
441 bh_lock_sock(sk);
442 if ((filter = sk->filter) != NULL)
443 res = sk_run_filter(skb, sk->filter->insns, sk->filter->len);
444 bh_unlock_sock(sk);
445
446 if (res == 0)
447 goto drop_n_restore;
448 if (snaplen > res)
449 snaplen = res;
450 }
451 #endif /* CONFIG_FILTER */
452
453 if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
454 goto drop_n_acct;
455
456 if (skb_shared(skb)) {
457 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
458 if (nskb == NULL)
459 goto drop_n_acct;
460
461 if (skb_head != skb->data) {
462 skb->data = skb_head;
463 skb->len = skb->tail - skb->data;
464 }
465 kfree_skb(skb);
466 skb = nskb;
467 }
468
469 sll = (struct sockaddr_ll*)skb->cb;
470 sll->sll_family = AF_PACKET;
471 sll->sll_hatype = dev->type;
472 sll->sll_protocol = skb->protocol;
473 sll->sll_pkttype = skb->pkt_type;
474 sll->sll_ifindex = dev->ifindex;
475 sll->sll_halen = 0;
476
477 if (dev->hard_header_parse)
478 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
479
480 #ifdef CONFIG_FILTER
481 if (skb->len > snaplen)
482 __skb_trim(skb, snaplen);
483 #endif
484
485 skb_set_owner_r(skb, sk);
486 skb->dev = NULL;
487 spin_lock(&sk->receive_queue.lock);
488 po->stats.tp_packets++;
489 __skb_queue_tail(&sk->receive_queue, skb);
490 spin_unlock(&sk->receive_queue.lock);
491 sk->data_ready(sk,skb->len);
492 return 0;
493
494 drop_n_acct:
495 spin_lock(&sk->receive_queue.lock);
496 po->stats.tp_drops++;
497 spin_unlock(&sk->receive_queue.lock);
498
499 #ifdef CONFIG_FILTER
500 drop_n_restore:
501 #endif
502 if (skb_head != skb->data && skb_shared(skb)) {
503 skb->data = skb_head;
504 skb->len = skb->tail - skb->data;
505 }
506 drop:
507 kfree_skb(skb);
508 return 0;
509 }
510
511 #ifdef CONFIG_PACKET_MMAP
512 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
513 {
514 struct sock *sk;
515 struct packet_opt *po;
516 struct sockaddr_ll *sll;
517 struct tpacket_hdr *h;
518 u8 * skb_head = skb->data;
519 unsigned snaplen;
520 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
521 unsigned short macoff, netoff;
522 struct sk_buff *copy_skb = NULL;
523
524 if (skb->pkt_type == PACKET_LOOPBACK)
525 goto drop;
526
527 sk = (struct sock *) pt->data;
528 po = sk->protinfo.af_packet;
529
530 if (dev->hard_header) {
531 if (sk->type != SOCK_DGRAM)
532 skb_push(skb, skb->data - skb->mac.raw);
533 else if (skb->pkt_type == PACKET_OUTGOING) {
534 /* Special case: outgoing packets have ll header at head */
535 skb_pull(skb, skb->nh.raw - skb->data);
536 }
537 }
538
539 snaplen = skb->len;
540
541 #ifdef CONFIG_FILTER
542 if (sk->filter) {
543 unsigned res = snaplen;
544 struct sk_filter *filter;
545
546 bh_lock_sock(sk);
547 if ((filter = sk->filter) != NULL)
548 res = sk_run_filter(skb, sk->filter->insns, sk->filter->len);
549 bh_unlock_sock(sk);
550
551 if (res == 0)
552 goto drop_n_restore;
553 if (snaplen > res)
554 snaplen = res;
555 }
556 #endif
557
558 if (sk->type == SOCK_DGRAM) {
559 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
560 } else {
561 unsigned maclen = skb->nh.raw - skb->data;
562 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
563 macoff = netoff - maclen;
564 }
565
566 if (macoff + snaplen > po->frame_size) {
567 if (po->copy_thresh &&
568 atomic_read(&sk->rmem_alloc) + skb->truesize < (unsigned)sk->rcvbuf) {
569 if (skb_shared(skb)) {
570 copy_skb = skb_clone(skb, GFP_ATOMIC);
571 } else {
572 copy_skb = skb_get(skb);
573 skb_head = skb->data;
574 }
575 if (copy_skb)
576 skb_set_owner_r(copy_skb, sk);
577 }
578 snaplen = po->frame_size - macoff;
579 if ((int)snaplen < 0)
580 snaplen = 0;
581 }
582
583 spin_lock(&sk->receive_queue.lock);
584 h = po->iovec[po->head];
585
586 if (h->tp_status)
587 goto ring_is_full;
588 po->head = po->head != po->iovmax ? po->head+1 : 0;
589 po->stats.tp_packets++;
590 if (copy_skb) {
591 status |= TP_STATUS_COPY;
592 __skb_queue_tail(&sk->receive_queue, copy_skb);
593 }
594 if (!po->stats.tp_drops)
595 status &= ~TP_STATUS_LOSING;
596 spin_unlock(&sk->receive_queue.lock);
597
598 memcpy((u8*)h + macoff, skb->data, snaplen);
599
600 h->tp_len = skb->len;
601 h->tp_snaplen = snaplen;
602 h->tp_mac = macoff;
603 h->tp_net = netoff;
604 h->tp_sec = skb->stamp.tv_sec;
605 h->tp_usec = skb->stamp.tv_usec;
606
607 sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
608 sll->sll_halen = 0;
609 if (dev->hard_header_parse)
610 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
611 sll->sll_family = AF_PACKET;
612 sll->sll_hatype = dev->type;
613 sll->sll_protocol = skb->protocol;
614 sll->sll_pkttype = skb->pkt_type;
615 sll->sll_ifindex = dev->ifindex;
616
617 h->tp_status = status;
618 mb();
619
620 sk->data_ready(sk, 0);
621
622 drop_n_restore:
623 if (skb_head != skb->data && skb_shared(skb)) {
624 skb->data = skb_head;
625 skb->len = skb->tail - skb->data;
626 }
627 drop:
628 kfree_skb(skb);
629 return 0;
630
631 ring_is_full:
632 po->stats.tp_drops++;
633 spin_unlock(&sk->receive_queue.lock);
634
635 sk->data_ready(sk, 0);
636 if (copy_skb)
637 kfree_skb(copy_skb);
638 goto drop_n_restore;
639 }
640
641 #endif
642
643
644 static int packet_sendmsg(struct socket *sock, struct msghdr *msg, int len,
645 struct scm_cookie *scm)
646 {
647 struct sock *sk = sock->sk;
648 struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
649 struct sk_buff *skb;
650 struct net_device *dev;
651 unsigned short proto;
652 unsigned char *addr;
653 int ifindex, err, reserve = 0;
654
655 /*
656 * Get and verify the address.
657 */
658
659 if (saddr == NULL) {
660 ifindex = sk->protinfo.af_packet->ifindex;
661 proto = sk->num;
662 addr = NULL;
663 } else {
664 err = -EINVAL;
665 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
666 goto out;
667 ifindex = saddr->sll_ifindex;
668 proto = saddr->sll_protocol;
669 addr = saddr->sll_addr;
670 }
671
672
673 dev = dev_get_by_index(ifindex);
674 err = -ENXIO;
675 if (dev == NULL)
676 goto out_unlock;
677 if (sock->type == SOCK_RAW)
678 reserve = dev->hard_header_len;
679
680 err = -EMSGSIZE;
681 if (len > dev->mtu+reserve)
682 goto out_unlock;
683
684 skb = sock_alloc_send_skb(sk, len+dev->hard_header_len+15, 0,
685 msg->msg_flags & MSG_DONTWAIT, &err);
686 if (skb==NULL)
687 goto out_unlock;
688
689 skb_reserve(skb, (dev->hard_header_len+15)&~15);
690 skb->nh.raw = skb->data;
691
692 if (dev->hard_header) {
693 int res;
694 err = -EINVAL;
695 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
696 if (sock->type != SOCK_DGRAM) {
697 skb->tail = skb->data;
698 skb->len = 0;
699 } else if (res < 0)
700 goto out_free;
701 }
702
703 /* Returns -EFAULT on error */
704 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
705 if (err)
706 goto out_free;
707
708 skb->protocol = proto;
709 skb->dev = dev;
710 skb->priority = sk->priority;
711
712 err = -ENETDOWN;
713 if (!(dev->flags & IFF_UP))
714 goto out_free;
715
716 /*
717 * Now send it
718 */
719
720 err = dev_queue_xmit(skb);
721 if (err > 0 && (err = net_xmit_errno(err)) != 0)
722 goto out_unlock;
723
724 dev_put(dev);
725
726 return(len);
727
728 out_free:
729 kfree_skb(skb);
730 out_unlock:
731 if (dev)
732 dev_put(dev);
733 out:
734 return err;
735 }
736
737 /*
738 * Close a PACKET socket. This is fairly simple. We immediately go
739 * to 'closed' state and remove our protocol entry in the device list.
740 */
741
742 static int packet_release(struct socket *sock)
743 {
744 struct sock *sk = sock->sk;
745 struct sock **skp;
746
747 if (!sk)
748 return 0;
749
750 write_lock_bh(&packet_sklist_lock);
751 for (skp = &packet_sklist; *skp; skp = &(*skp)->next) {
752 if (*skp == sk) {
753 *skp = sk->next;
754 __sock_put(sk);
755 break;
756 }
757 }
758 write_unlock_bh(&packet_sklist_lock);
759
760 /*
761 * Unhook packet receive handler.
762 */
763
764 if (sk->protinfo.af_packet->running) {
765 /*
766 * Remove the protocol hook
767 */
768 dev_remove_pack(&sk->protinfo.af_packet->prot_hook);
769 sk->protinfo.af_packet->running = 0;
770 __sock_put(sk);
771 }
772
773 #ifdef CONFIG_PACKET_MULTICAST
774 packet_flush_mclist(sk);
775 #endif
776
777 #ifdef CONFIG_PACKET_MMAP
778 if (sk->protinfo.af_packet->pg_vec) {
779 struct tpacket_req req;
780 memset(&req, 0, sizeof(req));
781 packet_set_ring(sk, &req, 1);
782 }
783 #endif
784
785 /*
786 * Now the socket is dead. No more input will appear.
787 */
788
789 sock_orphan(sk);
790 sock->sk = NULL;
791
792 /* Purge queues */
793
794 skb_queue_purge(&sk->receive_queue);
795
796 sock_put(sk);
797 return 0;
798 }
799
800 /*
801 * Attach a packet hook.
802 */
803
804 static int packet_do_bind(struct sock *sk, struct net_device *dev, int protocol)
805 {
806 /*
807 * Detach an existing hook if present.
808 */
809
810 lock_sock(sk);
811
812 spin_lock(&sk->protinfo.af_packet->bind_lock);
813 if (sk->protinfo.af_packet->running) {
814 dev_remove_pack(&sk->protinfo.af_packet->prot_hook);
815 __sock_put(sk);
816 sk->protinfo.af_packet->running = 0;
817 }
818
819 sk->num = protocol;
820 sk->protinfo.af_packet->prot_hook.type = protocol;
821 sk->protinfo.af_packet->prot_hook.dev = dev;
822
823 sk->protinfo.af_packet->ifindex = dev ? dev->ifindex : 0;
824
825 if (protocol == 0)
826 goto out_unlock;
827
828 if (dev) {
829 if (dev->flags&IFF_UP) {
830 dev_add_pack(&sk->protinfo.af_packet->prot_hook);
831 sock_hold(sk);
832 sk->protinfo.af_packet->running = 1;
833 } else {
834 sk->err = ENETDOWN;
835 if (!sk->dead)
836 sk->error_report(sk);
837 }
838 } else {
839 dev_add_pack(&sk->protinfo.af_packet->prot_hook);
840 sock_hold(sk);
841 sk->protinfo.af_packet->running = 1;
842 }
843
844 out_unlock:
845 spin_unlock(&sk->protinfo.af_packet->bind_lock);
846 release_sock(sk);
847 return 0;
848 }
849
850 /*
851 * Bind a packet socket to a device
852 */
853
854 #ifdef CONFIG_SOCK_PACKET
855
856 static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
857 {
858 struct sock *sk=sock->sk;
859 char name[15];
860 struct net_device *dev;
861 int err = -ENODEV;
862
863 /*
864 * Check legality
865 */
866
867 if(addr_len!=sizeof(struct sockaddr))
868 return -EINVAL;
869 strncpy(name,uaddr->sa_data,14);
870 name[14]=0;
871
872 dev = dev_get_by_name(name);
873 if (dev) {
874 err = packet_do_bind(sk, dev, sk->num);
875 dev_put(dev);
876 }
877 return err;
878 }
879 #endif
880
881 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
882 {
883 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
884 struct sock *sk=sock->sk;
885 struct net_device *dev = NULL;
886 int err;
887
888
889 /*
890 * Check legality
891 */
892
893 if (addr_len < sizeof(struct sockaddr_ll))
894 return -EINVAL;
895 if (sll->sll_family != AF_PACKET)
896 return -EINVAL;
897
898 if (sll->sll_ifindex) {
899 err = -ENODEV;
900 dev = dev_get_by_index(sll->sll_ifindex);
901 if (dev == NULL)
902 goto out;
903 }
904 err = packet_do_bind(sk, dev, sll->sll_protocol ? : sk->num);
905 if (dev)
906 dev_put(dev);
907
908 out:
909 return err;
910 }
911
912
913 /*
914 * Create a packet of type SOCK_PACKET.
915 */
916
917 static int packet_create(struct socket *sock, int protocol)
918 {
919 struct sock *sk;
920 int err;
921
922 if (!capable(CAP_NET_RAW))
923 return -EPERM;
924 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
925 #ifdef CONFIG_SOCK_PACKET
926 && sock->type != SOCK_PACKET
927 #endif
928 )
929 return -ESOCKTNOSUPPORT;
930
931 sock->state = SS_UNCONNECTED;
932 MOD_INC_USE_COUNT;
933
934 err = -ENOBUFS;
935 sk = sk_alloc(PF_PACKET, GFP_KERNEL, 1);
936 if (sk == NULL)
937 goto out;
938
939 sock->ops = &packet_ops;
940 #ifdef CONFIG_SOCK_PACKET
941 if (sock->type == SOCK_PACKET)
942 sock->ops = &packet_ops_spkt;
943 #endif
944 sock_init_data(sock,sk);
945
946 sk->protinfo.af_packet = kmalloc(sizeof(struct packet_opt), GFP_KERNEL);
947 if (sk->protinfo.af_packet == NULL)
948 goto out_free;
949 memset(sk->protinfo.af_packet, 0, sizeof(struct packet_opt));
950 sk->family = PF_PACKET;
951 sk->num = protocol;
952
953 sk->destruct = packet_sock_destruct;
954 atomic_inc(&packet_socks_nr);
955
956 /*
957 * Attach a protocol block
958 */
959
960 spin_lock_init(&sk->protinfo.af_packet->bind_lock);
961 sk->protinfo.af_packet->prot_hook.func = packet_rcv;
962 #ifdef CONFIG_SOCK_PACKET
963 if (sock->type == SOCK_PACKET)
964 sk->protinfo.af_packet->prot_hook.func = packet_rcv_spkt;
965 #endif
966 sk->protinfo.af_packet->prot_hook.data = (void *)sk;
967
968 if (protocol) {
969 sk->protinfo.af_packet->prot_hook.type = protocol;
970 dev_add_pack(&sk->protinfo.af_packet->prot_hook);
971 sock_hold(sk);
972 sk->protinfo.af_packet->running = 1;
973 }
974
975 write_lock_bh(&packet_sklist_lock);
976 sk->next = packet_sklist;
977 packet_sklist = sk;
978 sock_hold(sk);
979 write_unlock_bh(&packet_sklist_lock);
980 return(0);
981
982 out_free:
983 sk_free(sk);
984 out:
985 MOD_DEC_USE_COUNT;
986 return err;
987 }
988
989 /*
990 * Pull a packet from our receive queue and hand it to the user.
991 * If necessary we block.
992 */
993
994 static int packet_recvmsg(struct socket *sock, struct msghdr *msg, int len,
995 int flags, struct scm_cookie *scm)
996 {
997 struct sock *sk = sock->sk;
998 struct sk_buff *skb;
999 int copied, err;
1000
1001 err = -EINVAL;
1002 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC))
1003 goto out;
1004
1005 #if 0
1006 /* What error should we return now? EUNATTACH? */
1007 if (sk->protinfo.af_packet->ifindex < 0)
1008 return -ENODEV;
1009 #endif
1010
1011 /*
1012 * If the address length field is there to be filled in, we fill
1013 * it in now.
1014 */
1015
1016 if (sock->type == SOCK_PACKET)
1017 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1018 else
1019 msg->msg_namelen = sizeof(struct sockaddr_ll);
1020
1021 /*
1022 * Call the generic datagram receiver. This handles all sorts
1023 * of horrible races and re-entrancy so we can forget about it
1024 * in the protocol layers.
1025 *
1026 * Now it will return ENETDOWN, if device have just gone down,
1027 * but then it will block.
1028 */
1029
1030 skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1031
1032 /*
1033 * An error occurred so return it. Because skb_recv_datagram()
1034 * handles the blocking we don't see and worry about blocking
1035 * retries.
1036 */
1037
1038 if(skb==NULL)
1039 goto out;
1040
1041 /*
1042 * You lose any data beyond the buffer you gave. If it worries a
1043 * user program they can ask the device for its MTU anyway.
1044 */
1045
1046 copied = skb->len;
1047 if (copied > len)
1048 {
1049 copied=len;
1050 msg->msg_flags|=MSG_TRUNC;
1051 }
1052
1053 /* We can't use skb_copy_datagram here */
1054 err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
1055 if (err)
1056 goto out_free;
1057
1058 sock_recv_timestamp(msg, sk, skb);
1059
1060 if (msg->msg_name)
1061 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
1062
1063 /*
1064 * Free or return the buffer as appropriate. Again this
1065 * hides all the races and re-entrancy issues from us.
1066 */
1067 err = (flags&MSG_TRUNC) ? skb->len : copied;
1068
1069 out_free:
1070 skb_free_datagram(sk, skb);
1071 out:
1072 return err;
1073 }
1074
1075 #ifdef CONFIG_SOCK_PACKET
1076 static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1077 int *uaddr_len, int peer)
1078 {
1079 struct net_device *dev;
1080 struct sock *sk = sock->sk;
1081
1082 if (peer)
1083 return -EOPNOTSUPP;
1084
1085 uaddr->sa_family = AF_PACKET;
1086 dev = dev_get_by_index(sk->protinfo.af_packet->ifindex);
1087 if (dev) {
1088 strncpy(uaddr->sa_data, dev->name, 15);
1089 dev_put(dev);
1090 } else
1091 memset(uaddr->sa_data, 0, 14);
1092 *uaddr_len = sizeof(*uaddr);
1093
1094 return 0;
1095 }
1096 #endif
1097
1098 static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1099 int *uaddr_len, int peer)
1100 {
1101 struct net_device *dev;
1102 struct sock *sk = sock->sk;
1103 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1104
1105 if (peer)
1106 return -EOPNOTSUPP;
1107
1108 sll->sll_family = AF_PACKET;
1109 sll->sll_ifindex = sk->protinfo.af_packet->ifindex;
1110 sll->sll_protocol = sk->num;
1111 dev = dev_get_by_index(sk->protinfo.af_packet->ifindex);
1112 if (dev) {
1113 sll->sll_hatype = dev->type;
1114 sll->sll_halen = dev->addr_len;
1115 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1116 dev_put(dev);
1117 } else {
1118 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1119 sll->sll_halen = 0;
1120 }
1121 *uaddr_len = sizeof(*sll);
1122
1123 return 0;
1124 }
1125
1126 #ifdef CONFIG_PACKET_MULTICAST
1127 static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1128 {
1129 switch (i->type) {
1130 case PACKET_MR_MULTICAST:
1131 if (what > 0)
1132 dev_mc_add(dev, i->addr, i->alen, 0);
1133 else
1134 dev_mc_delete(dev, i->addr, i->alen, 0);
1135 break;
1136 case PACKET_MR_PROMISC:
1137 dev_set_promiscuity(dev, what);
1138 break;
1139 case PACKET_MR_ALLMULTI:
1140 dev_set_allmulti(dev, what);
1141 break;
1142 default:;
1143 }
1144 }
1145
1146 static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1147 {
1148 for ( ; i; i=i->next) {
1149 if (i->ifindex == dev->ifindex)
1150 packet_dev_mc(dev, i, what);
1151 }
1152 }
1153
1154 static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq)
1155 {
1156 struct packet_mclist *ml, *i;
1157 struct net_device *dev;
1158 int err;
1159
1160 rtnl_lock();
1161
1162 err = -ENODEV;
1163 dev = __dev_get_by_index(mreq->mr_ifindex);
1164 if (!dev)
1165 goto done;
1166
1167 err = -EINVAL;
1168 if (mreq->mr_alen > dev->addr_len)
1169 goto done;
1170
1171 err = -ENOBUFS;
1172 i = (struct packet_mclist *)kmalloc(sizeof(*i), GFP_KERNEL);
1173 if (i == NULL)
1174 goto done;
1175
1176 err = 0;
1177 for (ml=sk->protinfo.af_packet->mclist; ml; ml=ml->next) {
1178 if (ml->ifindex == mreq->mr_ifindex &&
1179 ml->type == mreq->mr_type &&
1180 ml->alen == mreq->mr_alen &&
1181 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1182 ml->count++;
1183 /* Free the new element ... */
1184 kfree(i);
1185 goto done;
1186 }
1187 }
1188
1189 i->type = mreq->mr_type;
1190 i->ifindex = mreq->mr_ifindex;
1191 i->alen = mreq->mr_alen;
1192 memcpy(i->addr, mreq->mr_address, i->alen);
1193 i->count = 1;
1194 i->next = sk->protinfo.af_packet->mclist;
1195 sk->protinfo.af_packet->mclist = i;
1196 packet_dev_mc(dev, i, +1);
1197
1198 done:
1199 rtnl_unlock();
1200 return err;
1201 }
1202
1203 static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq)
1204 {
1205 struct packet_mclist *ml, **mlp;
1206
1207 rtnl_lock();
1208
1209 for (mlp=&sk->protinfo.af_packet->mclist; (ml=*mlp)!=NULL; mlp=&ml->next) {
1210 if (ml->ifindex == mreq->mr_ifindex &&
1211 ml->type == mreq->mr_type &&
1212 ml->alen == mreq->mr_alen &&
1213 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1214 if (--ml->count == 0) {
1215 struct net_device *dev;
1216 *mlp = ml->next;
1217 dev = dev_get_by_index(ml->ifindex);
1218 if (dev) {
1219 packet_dev_mc(dev, ml, -1);
1220 dev_put(dev);
1221 }
1222 kfree(ml);
1223 }
1224 rtnl_unlock();
1225 return 0;
1226 }
1227 }
1228 rtnl_unlock();
1229 return -EADDRNOTAVAIL;
1230 }
1231
1232 static void packet_flush_mclist(struct sock *sk)
1233 {
1234 struct packet_mclist *ml;
1235
1236 if (sk->protinfo.af_packet->mclist == NULL)
1237 return;
1238
1239 rtnl_lock();
1240 while ((ml=sk->protinfo.af_packet->mclist) != NULL) {
1241 struct net_device *dev;
1242 sk->protinfo.af_packet->mclist = ml->next;
1243 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1244 packet_dev_mc(dev, ml, -1);
1245 dev_put(dev);
1246 }
1247 kfree(ml);
1248 }
1249 rtnl_unlock();
1250 }
1251 #endif
1252
1253 static int
1254 packet_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen)
1255 {
1256 struct sock *sk = sock->sk;
1257 int ret;
1258
1259 if (level != SOL_PACKET)
1260 return -ENOPROTOOPT;
1261
1262 switch(optname) {
1263 #ifdef CONFIG_PACKET_MULTICAST
1264 case PACKET_ADD_MEMBERSHIP:
1265 case PACKET_DROP_MEMBERSHIP:
1266 {
1267 struct packet_mreq mreq;
1268 if (optlen<sizeof(mreq))
1269 return -EINVAL;
1270 if (copy_from_user(&mreq,optval,sizeof(mreq)))
1271 return -EFAULT;
1272 if (optname == PACKET_ADD_MEMBERSHIP)
1273 ret = packet_mc_add(sk, &mreq);
1274 else
1275 ret = packet_mc_drop(sk, &mreq);
1276 return ret;
1277 }
1278 #endif
1279 #ifdef CONFIG_PACKET_MMAP
1280 case PACKET_RX_RING:
1281 {
1282 struct tpacket_req req;
1283
1284 if (optlen<sizeof(req))
1285 return -EINVAL;
1286 if (copy_from_user(&req,optval,sizeof(req)))
1287 return -EFAULT;
1288 return packet_set_ring(sk, &req, 0);
1289 }
1290 case PACKET_COPY_THRESH:
1291 {
1292 int val;
1293
1294 if (optlen!=sizeof(val))
1295 return -EINVAL;
1296 if (copy_from_user(&val,optval,sizeof(val)))
1297 return -EFAULT;
1298
1299 sk->protinfo.af_packet->copy_thresh = val;
1300 return 0;
1301 }
1302 #endif
1303 default:
1304 return -ENOPROTOOPT;
1305 }
1306 }
1307
1308 int packet_getsockopt(struct socket *sock, int level, int optname,
1309 char *optval, int *optlen)
1310 {
1311 int len;
1312 struct sock *sk = sock->sk;
1313
1314 if (level != SOL_PACKET)
1315 return -ENOPROTOOPT;
1316
1317 if (get_user(len,optlen))
1318 return -EFAULT;
1319
1320 switch(optname) {
1321 case PACKET_STATISTICS:
1322 {
1323 struct tpacket_stats st;
1324
1325 if (len > sizeof(struct tpacket_stats))
1326 len = sizeof(struct tpacket_stats);
1327 spin_lock_bh(&sk->receive_queue.lock);
1328 st = sk->protinfo.af_packet->stats;
1329 memset(&sk->protinfo.af_packet->stats, 0, sizeof(st));
1330 spin_unlock_bh(&sk->receive_queue.lock);
1331 st.tp_packets += st.tp_drops;
1332
1333 if (copy_to_user(optval, &st, len))
1334 return -EFAULT;
1335 break;
1336 }
1337 default:
1338 return -ENOPROTOOPT;
1339 }
1340
1341 if (put_user(len, optlen))
1342 return -EFAULT;
1343 return 0;
1344 }
1345
1346
1347 static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1348 {
1349 struct sock *sk;
1350 struct packet_opt *po;
1351 struct net_device *dev = (struct net_device*)data;
1352
1353 read_lock(&packet_sklist_lock);
1354 for (sk = packet_sklist; sk; sk = sk->next) {
1355 po = sk->protinfo.af_packet;
1356
1357 switch (msg) {
1358 case NETDEV_DOWN:
1359 case NETDEV_UNREGISTER:
1360 if (dev->ifindex == po->ifindex) {
1361 spin_lock(&po->bind_lock);
1362 if (po->running) {
1363 dev_remove_pack(&po->prot_hook);
1364 __sock_put(sk);
1365 po->running = 0;
1366 sk->err = ENETDOWN;
1367 if (!sk->dead)
1368 sk->error_report(sk);
1369 }
1370 if (msg == NETDEV_UNREGISTER) {
1371 po->ifindex = -1;
1372 po->prot_hook.dev = NULL;
1373 }
1374 spin_unlock(&po->bind_lock);
1375 }
1376 #ifdef CONFIG_PACKET_MULTICAST
1377 if (po->mclist)
1378 packet_dev_mclist(dev, po->mclist, -1);
1379 #endif
1380 break;
1381 case NETDEV_UP:
1382 spin_lock(&po->bind_lock);
1383 if (dev->ifindex == po->ifindex && sk->num && po->running==0) {
1384 dev_add_pack(&po->prot_hook);
1385 sock_hold(sk);
1386 po->running = 1;
1387 }
1388 spin_unlock(&po->bind_lock);
1389 #ifdef CONFIG_PACKET_MULTICAST
1390 if (po->mclist)
1391 packet_dev_mclist(dev, po->mclist, +1);
1392 #endif
1393 break;
1394 }
1395 }
1396 read_unlock(&packet_sklist_lock);
1397 return NOTIFY_DONE;
1398 }
1399
1400
1401 static int packet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1402 {
1403 struct sock *sk = sock->sk;
1404 int err;
1405 int pid;
1406
1407 switch(cmd)
1408 {
1409 case SIOCOUTQ:
1410 {
1411 int amount = atomic_read(&sk->wmem_alloc);
1412 return put_user(amount, (int *)arg);
1413 }
1414 case SIOCINQ:
1415 {
1416 struct sk_buff *skb;
1417 int amount = 0;
1418
1419 spin_lock_bh(&sk->receive_queue.lock);
1420 skb = skb_peek(&sk->receive_queue);
1421 if (skb)
1422 amount = skb->len;
1423 spin_unlock_bh(&sk->receive_queue.lock);
1424 return put_user(amount, (int *)arg);
1425 }
1426 case FIOSETOWN:
1427 case SIOCSPGRP:
1428 err = get_user(pid, (int *) arg);
1429 if (err)
1430 return err;
1431 if (current->pid != pid && current->pgrp != -pid &&
1432 !capable(CAP_NET_ADMIN))
1433 return -EPERM;
1434 sk->proc = pid;
1435 return(0);
1436 case FIOGETOWN:
1437 case SIOCGPGRP:
1438 return put_user(sk->proc, (int *)arg);
1439 case SIOCGSTAMP:
1440 if(sk->stamp.tv_sec==0)
1441 return -ENOENT;
1442 err = -EFAULT;
1443 if (!copy_to_user((void *)arg, &sk->stamp, sizeof(struct timeval)))
1444 err = 0;
1445 return err;
1446 case SIOCGIFFLAGS:
1447 #ifndef CONFIG_INET
1448 case SIOCSIFFLAGS:
1449 #endif
1450 case SIOCGIFCONF:
1451 case SIOCGIFMETRIC:
1452 case SIOCSIFMETRIC:
1453 case SIOCGIFMEM:
1454 case SIOCSIFMEM:
1455 case SIOCGIFMTU:
1456 case SIOCSIFMTU:
1457 case SIOCSIFLINK:
1458 case SIOCGIFHWADDR:
1459 case SIOCSIFHWADDR:
1460 case SIOCSIFMAP:
1461 case SIOCGIFMAP:
1462 case SIOCSIFSLAVE:
1463 case SIOCGIFSLAVE:
1464 case SIOCGIFINDEX:
1465 case SIOCGIFNAME:
1466 case SIOCGIFCOUNT:
1467 case SIOCSIFHWBROADCAST:
1468 return(dev_ioctl(cmd,(void *) arg));
1469
1470 case SIOCGIFBR:
1471 case SIOCSIFBR:
1472 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
1473 #ifdef CONFIG_INET
1474 #ifdef CONFIG_KMOD
1475 if (br_ioctl_hook == NULL)
1476 request_module("bridge");
1477 #endif
1478 if (br_ioctl_hook != NULL)
1479 return br_ioctl_hook(arg);
1480 #endif
1481 #endif
1482
1483 case SIOCGIFDIVERT:
1484 case SIOCSIFDIVERT:
1485 #ifdef CONFIG_NET_DIVERT
1486 return(divert_ioctl(cmd, (struct divert_cf *) arg));
1487 #else
1488 return -ENOPKG;
1489 #endif /* CONFIG_NET_DIVERT */
1490
1491 return -ENOPKG;
1492
1493 #ifdef CONFIG_INET
1494 case SIOCADDRT:
1495 case SIOCDELRT:
1496 case SIOCDARP:
1497 case SIOCGARP:
1498 case SIOCSARP:
1499 case SIOCGIFADDR:
1500 case SIOCSIFADDR:
1501 case SIOCGIFBRDADDR:
1502 case SIOCSIFBRDADDR:
1503 case SIOCGIFNETMASK:
1504 case SIOCSIFNETMASK:
1505 case SIOCGIFDSTADDR:
1506 case SIOCSIFDSTADDR:
1507 case SIOCSIFFLAGS:
1508 case SIOCADDDLCI:
1509 case SIOCDELDLCI:
1510 return inet_dgram_ops.ioctl(sock, cmd, arg);
1511 #endif
1512
1513 default:
1514 if ((cmd >= SIOCDEVPRIVATE) &&
1515 (cmd <= (SIOCDEVPRIVATE + 15)))
1516 return(dev_ioctl(cmd,(void *) arg));
1517
1518 #ifdef CONFIG_NET_RADIO
1519 if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST))
1520 return(dev_ioctl(cmd,(void *) arg));
1521 #endif
1522 return -EOPNOTSUPP;
1523 }
1524 /*NOTREACHED*/
1525 return(0);
1526 }
1527
1528 #ifndef CONFIG_PACKET_MMAP
1529 #define packet_mmap sock_no_mmap
1530 #define packet_poll datagram_poll
1531 #else
1532
1533 unsigned int packet_poll(struct file * file, struct socket *sock, poll_table *wait)
1534 {
1535 struct sock *sk = sock->sk;
1536 struct packet_opt *po = sk->protinfo.af_packet;
1537 unsigned int mask = datagram_poll(file, sock, wait);
1538
1539 spin_lock_bh(&sk->receive_queue.lock);
1540 if (po->iovec) {
1541 unsigned last = po->head ? po->head-1 : po->iovmax;
1542
1543 if (po->iovec[last]->tp_status)
1544 mask |= POLLIN | POLLRDNORM;
1545 }
1546 spin_unlock_bh(&sk->receive_queue.lock);
1547 return mask;
1548 }
1549
1550
1551 /* Dirty? Well, I still did not learn better way to account
1552 * for user mmaps.
1553 */
1554
1555 static void packet_mm_open(struct vm_area_struct *vma)
1556 {
1557 struct file *file = vma->vm_file;
1558 struct inode *inode = file->f_dentry->d_inode;
1559 struct socket * sock = &inode->u.socket_i;
1560 struct sock *sk = sock->sk;
1561
1562 if (sk)
1563 atomic_inc(&sk->protinfo.af_packet->mapped);
1564 }
1565
1566 static void packet_mm_close(struct vm_area_struct *vma)
1567 {
1568 struct file *file = vma->vm_file;
1569 struct inode *inode = file->f_dentry->d_inode;
1570 struct socket * sock = &inode->u.socket_i;
1571 struct sock *sk = sock->sk;
1572
1573 if (sk)
1574 atomic_dec(&sk->protinfo.af_packet->mapped);
1575 }
1576
1577 static struct vm_operations_struct packet_mmap_ops = {
1578 open: packet_mm_open,
1579 close: packet_mm_close,
1580 };
1581
1582 static void free_pg_vec(unsigned long *pg_vec, unsigned order, unsigned len)
1583 {
1584 int i;
1585
1586 for (i=0; i<len; i++) {
1587 if (pg_vec[i]) {
1588 struct page *page, *pend;
1589
1590 pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1591 for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1592 ClearPageReserved(page);
1593 free_pages(pg_vec[i], order);
1594 }
1595 }
1596 kfree(pg_vec);
1597 }
1598
1599
1600 static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1601 {
1602 unsigned long *pg_vec = NULL;
1603 struct tpacket_hdr **io_vec = NULL;
1604 struct packet_opt *po = sk->protinfo.af_packet;
1605 int order = 0;
1606 int err = 0;
1607
1608 if (req->tp_block_nr) {
1609 int i, l;
1610 int frames_per_block;
1611
1612 /* Sanity tests and some calculations */
1613 if ((int)req->tp_block_size <= 0)
1614 return -EINVAL;
1615 if (req->tp_block_size&(PAGE_SIZE-1))
1616 return -EINVAL;
1617 if (req->tp_frame_size < TPACKET_HDRLEN)
1618 return -EINVAL;
1619 if (req->tp_frame_size&(TPACKET_ALIGNMENT-1))
1620 return -EINVAL;
1621 frames_per_block = req->tp_block_size/req->tp_frame_size;
1622 if (frames_per_block <= 0)
1623 return -EINVAL;
1624 if (frames_per_block*req->tp_block_nr != req->tp_frame_nr)
1625 return -EINVAL;
1626 /* OK! */
1627
1628 /* Allocate page vector */
1629 while ((PAGE_SIZE<<order) < req->tp_block_size)
1630 order++;
1631
1632 err = -ENOMEM;
1633
1634 pg_vec = kmalloc(req->tp_block_nr*sizeof(unsigned long*), GFP_KERNEL);
1635 if (pg_vec == NULL)
1636 goto out;
1637 memset(pg_vec, 0, req->tp_block_nr*sizeof(unsigned long*));
1638
1639 for (i=0; i<req->tp_block_nr; i++) {
1640 struct page *page, *pend;
1641 pg_vec[i] = __get_free_pages(GFP_KERNEL, order);
1642 if (!pg_vec[i])
1643 goto out_free_pgvec;
1644
1645 pend = virt_to_page(pg_vec[i] + (PAGE_SIZE << order) - 1);
1646 for (page = virt_to_page(pg_vec[i]); page <= pend; page++)
1647 SetPageReserved(page);
1648 }
1649 /* Page vector is allocated */
1650
1651 /* Draw frames */
1652 io_vec = kmalloc(req->tp_frame_nr*sizeof(struct tpacket_hdr*), GFP_KERNEL);
1653 if (io_vec == NULL)
1654 goto out_free_pgvec;
1655 memset(io_vec, 0, req->tp_frame_nr*sizeof(struct tpacket_hdr*));
1656
1657 l = 0;
1658 for (i=0; i<req->tp_block_nr; i++) {
1659 unsigned long ptr = pg_vec[i];
1660 int k;
1661
1662 for (k=0; k<frames_per_block; k++, l++) {
1663 io_vec[l] = (struct tpacket_hdr*)ptr;
1664 io_vec[l]->tp_status = TP_STATUS_KERNEL;
1665 ptr += req->tp_frame_size;
1666 }
1667 }
1668 /* Done */
1669 } else {
1670 if (req->tp_frame_nr)
1671 return -EINVAL;
1672 }
1673
1674 lock_sock(sk);
1675
1676 /* Detach socket from network */
1677 spin_lock(&po->bind_lock);
1678 if (po->running)
1679 dev_remove_pack(&po->prot_hook);
1680 spin_unlock(&po->bind_lock);
1681
1682 err = -EBUSY;
1683 if (closing || atomic_read(&po->mapped) == 0) {
1684 err = 0;
1685 #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1686
1687 spin_lock_bh(&sk->receive_queue.lock);
1688 pg_vec = XC(po->pg_vec, pg_vec);
1689 io_vec = XC(po->iovec, io_vec);
1690 po->iovmax = req->tp_frame_nr-1;
1691 po->head = 0;
1692 po->frame_size = req->tp_frame_size;
1693 spin_unlock_bh(&sk->receive_queue.lock);
1694
1695 order = XC(po->pg_vec_order, order);
1696 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1697
1698 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1699 po->prot_hook.func = po->iovec ? tpacket_rcv : packet_rcv;
1700 skb_queue_purge(&sk->receive_queue);
1701 #undef XC
1702 if (atomic_read(&po->mapped))
1703 printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1704 }
1705
1706 spin_lock(&po->bind_lock);
1707 if (po->running)
1708 dev_add_pack(&po->prot_hook);
1709 spin_unlock(&po->bind_lock);
1710
1711 release_sock(sk);
1712
1713 if (io_vec)
1714 kfree(io_vec);
1715
1716 out_free_pgvec:
1717 if (pg_vec)
1718 free_pg_vec(pg_vec, order, req->tp_block_nr);
1719 out:
1720 return err;
1721 }
1722
1723 static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1724 {
1725 struct sock *sk = sock->sk;
1726 struct packet_opt *po = sk->protinfo.af_packet;
1727 unsigned long size;
1728 unsigned long start;
1729 int err = -EINVAL;
1730 int i;
1731
1732 if (vma->vm_pgoff)
1733 return -EINVAL;
1734
1735 size = vma->vm_end - vma->vm_start;
1736
1737 lock_sock(sk);
1738 if (po->pg_vec == NULL)
1739 goto out;
1740 if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1741 goto out;
1742
1743 atomic_inc(&po->mapped);
1744 start = vma->vm_start;
1745 err = -EAGAIN;
1746 for (i=0; i<po->pg_vec_len; i++) {
1747 if (remap_page_range(start, __pa(po->pg_vec[i]),
1748 po->pg_vec_pages*PAGE_SIZE,
1749 vma->vm_page_prot))
1750 goto out;
1751 start += po->pg_vec_pages*PAGE_SIZE;
1752 }
1753 vma->vm_ops = &packet_mmap_ops;
1754 err = 0;
1755
1756 out:
1757 release_sock(sk);
1758 return err;
1759 }
1760 #endif
1761
1762
1763 #ifdef CONFIG_SOCK_PACKET
1764 struct proto_ops packet_ops_spkt = {
1765 family: PF_PACKET,
1766
1767 release: packet_release,
1768 bind: packet_bind_spkt,
1769 connect: sock_no_connect,
1770 socketpair: sock_no_socketpair,
1771 accept: sock_no_accept,
1772 getname: packet_getname_spkt,
1773 poll: datagram_poll,
1774 ioctl: packet_ioctl,
1775 listen: sock_no_listen,
1776 shutdown: sock_no_shutdown,
1777 setsockopt: sock_no_setsockopt,
1778 getsockopt: sock_no_getsockopt,
1779 sendmsg: packet_sendmsg_spkt,
1780 recvmsg: packet_recvmsg,
1781 mmap: sock_no_mmap,
1782 };
1783 #endif
1784
1785 struct proto_ops packet_ops = {
1786 family: PF_PACKET,
1787
1788 release: packet_release,
1789 bind: packet_bind,
1790 connect: sock_no_connect,
1791 socketpair: sock_no_socketpair,
1792 accept: sock_no_accept,
1793 getname: packet_getname,
1794 poll: packet_poll,
1795 ioctl: packet_ioctl,
1796 listen: sock_no_listen,
1797 shutdown: sock_no_shutdown,
1798 setsockopt: packet_setsockopt,
1799 getsockopt: packet_getsockopt,
1800 sendmsg: packet_sendmsg,
1801 recvmsg: packet_recvmsg,
1802 mmap: packet_mmap,
1803 };
1804
1805 static struct net_proto_family packet_family_ops = {
1806 PF_PACKET,
1807 packet_create
1808 };
1809
1810 struct notifier_block packet_netdev_notifier={
1811 packet_notifier,
1812 NULL,
1813 0
1814 };
1815
1816 #ifdef CONFIG_PROC_FS
1817 static int packet_read_proc(char *buffer, char **start, off_t offset,
1818 int length, int *eof, void *data)
1819 {
1820 off_t pos=0;
1821 off_t begin=0;
1822 int len=0;
1823 struct sock *s;
1824
1825 len+= sprintf(buffer,"sk RefCnt Type Proto Iface R Rmem User Inode\n");
1826
1827 read_lock(&packet_sklist_lock);
1828
1829 for (s = packet_sklist; s; s = s->next) {
1830 len+=sprintf(buffer+len,"%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu",
1831 s,
1832 atomic_read(&s->refcnt),
1833 s->type,
1834 ntohs(s->num),
1835 s->protinfo.af_packet->ifindex,
1836 s->protinfo.af_packet->running,
1837 atomic_read(&s->rmem_alloc),
1838 sock_i_uid(s),
1839 sock_i_ino(s)
1840 );
1841
1842 buffer[len++]='\n';
1843
1844 pos=begin+len;
1845 if(pos<offset) {
1846 len=0;
1847 begin=pos;
1848 }
1849 if(pos>offset+length)
1850 goto done;
1851 }
1852 *eof = 1;
1853
1854 done:
1855 read_unlock(&packet_sklist_lock);
1856 *start=buffer+(offset-begin);
1857 len-=(offset-begin);
1858 if(len>length)
1859 len=length;
1860 if(len<0)
1861 len=0;
1862 return len;
1863 }
1864 #endif
1865
1866
1867
1868 static void __exit packet_exit(void)
1869 {
1870 #ifdef CONFIG_PROC_FS
1871 remove_proc_entry("net/packet", 0);
1872 #endif
1873 unregister_netdevice_notifier(&packet_netdev_notifier);
1874 sock_unregister(PF_PACKET);
1875 return;
1876 }
1877
1878
1879 static int __init packet_init(void)
1880 {
1881 sock_register(&packet_family_ops);
1882 register_netdevice_notifier(&packet_netdev_notifier);
1883 #ifdef CONFIG_PROC_FS
1884 create_proc_read_entry("net/packet", 0, 0, packet_read_proc, NULL);
1885 #endif
1886 return 0;
1887 }
1888
1889
1890 module_init(packet_init);
1891 module_exit(packet_exit);
1892
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.