6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation
16 #include <linux/workqueue.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/bootmem.h>
22 #include <linux/vmalloc.h>
23 #include <linux/cache.h>
24 #include <asm/uaccess.h>
27 EXPORT_SYMBOL(xfrm_nl);
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
35 /* Each xfrm_state may be linked to two tables:
37 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38 2. Hash table by daddr to find what SAs exist for given
39 destination/tunnel endpoint. (output)
42 static DEFINE_SPINLOCK(xfrm_state_lock);
44 /* Hash table to find appropriate SA towards given target (endpoint
45 * of tunnel or destination of transport mode) allowed by selector.
47 * Main use is finding SA after policy selected tunnel or transport mode.
48 * Also, it can be used by ah/esp icmp error handler to find offending SA.
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
56 static unsigned int xfrm_state_genid;
58 static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask)
62 h = (h ^ (h>>16)) & hmask;
66 static inline unsigned int __xfrm6_dst_hash(xfrm_address_t *addr, unsigned int hmask)
69 h = ntohl(addr->a6[2]^addr->a6[3]);
70 h = (h ^ (h>>16)) & hmask;
74 static inline unsigned int __xfrm4_src_hash(xfrm_address_t *addr, unsigned int hmask)
76 return __xfrm4_dst_hash(addr, hmask);
79 static inline unsigned int __xfrm6_src_hash(xfrm_address_t *addr, unsigned int hmask)
81 return __xfrm6_dst_hash(addr, hmask);
84 static inline unsigned __xfrm_src_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask)
88 return __xfrm4_src_hash(addr, hmask);
90 return __xfrm6_src_hash(addr, hmask);
95 static inline unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family)
97 return __xfrm_src_hash(addr, family, xfrm_state_hmask);
100 static inline unsigned int __xfrm_dst_hash(xfrm_address_t *addr, unsigned short family, unsigned int hmask)
104 return __xfrm4_dst_hash(addr, hmask);
106 return __xfrm6_dst_hash(addr, hmask);
111 static inline unsigned int xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
113 return __xfrm_dst_hash(addr, family, xfrm_state_hmask);
116 static inline unsigned int __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
120 h = ntohl(addr->a4^spi^proto);
121 h = (h ^ (h>>10) ^ (h>>20)) & hmask;
125 static inline unsigned int __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto,
129 h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
130 h = (h ^ (h>>10) ^ (h>>20)) & hmask;
135 unsigned __xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family,
140 return __xfrm4_spi_hash(addr, spi, proto, hmask);
142 return __xfrm6_spi_hash(addr, spi, proto, hmask);
147 static inline unsigned int
148 xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
150 return __xfrm_spi_hash(addr, spi, proto, family, xfrm_state_hmask);
153 static struct hlist_head *xfrm_state_hash_alloc(unsigned int sz)
155 struct hlist_head *n;
158 n = kmalloc(sz, GFP_KERNEL);
160 n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL);
162 n = (struct hlist_head *)
163 __get_free_pages(GFP_KERNEL, get_order(sz));
171 static void xfrm_state_hash_free(struct hlist_head *n, unsigned int sz)
178 free_pages((unsigned long)n, get_order(sz));
181 static void xfrm_hash_transfer(struct hlist_head *list,
182 struct hlist_head *ndsttable,
183 struct hlist_head *nsrctable,
184 struct hlist_head *nspitable,
185 unsigned int nhashmask)
187 struct hlist_node *entry, *tmp;
188 struct xfrm_state *x;
190 hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
193 h = __xfrm_dst_hash(&x->id.daddr, x->props.family, nhashmask);
194 hlist_add_head(&x->bydst, ndsttable+h);
196 h = __xfrm_src_hash(&x->props.saddr, x->props.family,
198 hlist_add_head(&x->bysrc, nsrctable+h);
200 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
201 x->props.family, nhashmask);
202 hlist_add_head(&x->byspi, nspitable+h);
206 static unsigned long xfrm_hash_new_size(void)
208 return ((xfrm_state_hmask + 1) << 1) *
209 sizeof(struct hlist_head);
212 static DEFINE_MUTEX(hash_resize_mutex);
214 static void xfrm_hash_resize(void *__unused)
216 struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
217 unsigned long nsize, osize;
218 unsigned int nhashmask, ohashmask;
221 mutex_lock(&hash_resize_mutex);
223 nsize = xfrm_hash_new_size();
224 ndst = xfrm_state_hash_alloc(nsize);
227 nsrc = xfrm_state_hash_alloc(nsize);
229 xfrm_state_hash_free(ndst, nsize);
232 nspi = xfrm_state_hash_alloc(nsize);
234 xfrm_state_hash_free(ndst, nsize);
235 xfrm_state_hash_free(nsrc, nsize);
239 spin_lock_bh(&xfrm_state_lock);
241 nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
242 for (i = xfrm_state_hmask; i >= 0; i--)
243 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
246 odst = xfrm_state_bydst;
247 osrc = xfrm_state_bysrc;
248 ospi = xfrm_state_byspi;
249 ohashmask = xfrm_state_hmask;
251 xfrm_state_bydst = ndst;
252 xfrm_state_bysrc = nsrc;
253 xfrm_state_byspi = nspi;
254 xfrm_state_hmask = nhashmask;
256 spin_unlock_bh(&xfrm_state_lock);
258 osize = (ohashmask + 1) * sizeof(struct hlist_head);
259 xfrm_state_hash_free(odst, osize);
260 xfrm_state_hash_free(osrc, osize);
261 xfrm_state_hash_free(ospi, osize);
264 mutex_unlock(&hash_resize_mutex);
267 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
269 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
270 EXPORT_SYMBOL(km_waitq);
272 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
273 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
275 static struct work_struct xfrm_state_gc_work;
276 static HLIST_HEAD(xfrm_state_gc_list);
277 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
279 static int xfrm_state_gc_flush_bundles;
281 int __xfrm_state_delete(struct xfrm_state *x);
283 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
284 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
286 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
287 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
289 static void xfrm_state_gc_destroy(struct xfrm_state *x)
291 if (del_timer(&x->timer))
293 if (del_timer(&x->rtimer))
301 xfrm_put_mode(x->mode);
303 x->type->destructor(x);
304 xfrm_put_type(x->type);
306 security_xfrm_state_free(x);
310 static void xfrm_state_gc_task(void *data)
312 struct xfrm_state *x;
313 struct hlist_node *entry, *tmp;
314 struct hlist_head gc_list;
316 if (xfrm_state_gc_flush_bundles) {
317 xfrm_state_gc_flush_bundles = 0;
318 xfrm_flush_bundles();
321 spin_lock_bh(&xfrm_state_gc_lock);
322 gc_list.first = xfrm_state_gc_list.first;
323 INIT_HLIST_HEAD(&xfrm_state_gc_list);
324 spin_unlock_bh(&xfrm_state_gc_lock);
326 hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
327 xfrm_state_gc_destroy(x);
332 static inline unsigned long make_jiffies(long secs)
334 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
335 return MAX_SCHEDULE_TIMEOUT-1;
340 static void xfrm_timer_handler(unsigned long data)
342 struct xfrm_state *x = (struct xfrm_state*)data;
343 unsigned long now = (unsigned long)xtime.tv_sec;
344 long next = LONG_MAX;
348 if (x->km.state == XFRM_STATE_DEAD)
350 if (x->km.state == XFRM_STATE_EXPIRED)
352 if (x->lft.hard_add_expires_seconds) {
353 long tmo = x->lft.hard_add_expires_seconds +
354 x->curlft.add_time - now;
360 if (x->lft.hard_use_expires_seconds) {
361 long tmo = x->lft.hard_use_expires_seconds +
362 (x->curlft.use_time ? : now) - now;
370 if (x->lft.soft_add_expires_seconds) {
371 long tmo = x->lft.soft_add_expires_seconds +
372 x->curlft.add_time - now;
378 if (x->lft.soft_use_expires_seconds) {
379 long tmo = x->lft.soft_use_expires_seconds +
380 (x->curlft.use_time ? : now) - now;
389 km_state_expired(x, 0, 0);
391 if (next != LONG_MAX &&
392 !mod_timer(&x->timer, jiffies + make_jiffies(next)))
397 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
398 x->km.state = XFRM_STATE_EXPIRED;
403 if (!__xfrm_state_delete(x) && x->id.spi)
404 km_state_expired(x, 1, 0);
407 spin_unlock(&x->lock);
411 static void xfrm_replay_timer_handler(unsigned long data);
413 struct xfrm_state *xfrm_state_alloc(void)
415 struct xfrm_state *x;
417 x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
420 atomic_set(&x->refcnt, 1);
421 atomic_set(&x->tunnel_users, 0);
422 INIT_HLIST_NODE(&x->bydst);
423 INIT_HLIST_NODE(&x->bysrc);
424 INIT_HLIST_NODE(&x->byspi);
425 init_timer(&x->timer);
426 x->timer.function = xfrm_timer_handler;
427 x->timer.data = (unsigned long)x;
428 init_timer(&x->rtimer);
429 x->rtimer.function = xfrm_replay_timer_handler;
430 x->rtimer.data = (unsigned long)x;
431 x->curlft.add_time = (unsigned long)xtime.tv_sec;
432 x->lft.soft_byte_limit = XFRM_INF;
433 x->lft.soft_packet_limit = XFRM_INF;
434 x->lft.hard_byte_limit = XFRM_INF;
435 x->lft.hard_packet_limit = XFRM_INF;
436 x->replay_maxage = 0;
437 x->replay_maxdiff = 0;
438 spin_lock_init(&x->lock);
442 EXPORT_SYMBOL(xfrm_state_alloc);
444 void __xfrm_state_destroy(struct xfrm_state *x)
446 BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
448 spin_lock_bh(&xfrm_state_gc_lock);
449 hlist_add_head(&x->bydst, &xfrm_state_gc_list);
450 spin_unlock_bh(&xfrm_state_gc_lock);
451 schedule_work(&xfrm_state_gc_work);
453 EXPORT_SYMBOL(__xfrm_state_destroy);
455 int __xfrm_state_delete(struct xfrm_state *x)
459 if (x->km.state != XFRM_STATE_DEAD) {
460 x->km.state = XFRM_STATE_DEAD;
461 spin_lock(&xfrm_state_lock);
462 hlist_del(&x->bydst);
464 hlist_del(&x->bysrc);
467 hlist_del(&x->byspi);
471 spin_unlock(&xfrm_state_lock);
472 if (del_timer(&x->timer))
474 if (del_timer(&x->rtimer))
477 /* The number two in this test is the reference
478 * mentioned in the comment below plus the reference
479 * our caller holds. A larger value means that
480 * there are DSTs attached to this xfrm_state.
482 if (atomic_read(&x->refcnt) > 2) {
483 xfrm_state_gc_flush_bundles = 1;
484 schedule_work(&xfrm_state_gc_work);
487 /* All xfrm_state objects are created by xfrm_state_alloc.
488 * The xfrm_state_alloc call gives a reference, and that
489 * is what we are dropping here.
497 EXPORT_SYMBOL(__xfrm_state_delete);
499 int xfrm_state_delete(struct xfrm_state *x)
503 spin_lock_bh(&x->lock);
504 err = __xfrm_state_delete(x);
505 spin_unlock_bh(&x->lock);
509 EXPORT_SYMBOL(xfrm_state_delete);
511 void xfrm_state_flush(u8 proto)
515 spin_lock_bh(&xfrm_state_lock);
516 for (i = 0; i < xfrm_state_hmask; i++) {
517 struct hlist_node *entry;
518 struct xfrm_state *x;
520 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
521 if (!xfrm_state_kern(x) &&
522 xfrm_id_proto_match(x->id.proto, proto)) {
524 spin_unlock_bh(&xfrm_state_lock);
526 xfrm_state_delete(x);
529 spin_lock_bh(&xfrm_state_lock);
534 spin_unlock_bh(&xfrm_state_lock);
537 EXPORT_SYMBOL(xfrm_state_flush);
540 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
541 struct xfrm_tmpl *tmpl,
542 xfrm_address_t *daddr, xfrm_address_t *saddr,
543 unsigned short family)
545 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
548 afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
549 xfrm_state_put_afinfo(afinfo);
553 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family)
555 unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
556 struct xfrm_state *x;
557 struct hlist_node *entry;
559 hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
560 if (x->props.family != family ||
562 x->id.proto != proto)
567 if (x->id.daddr.a4 != daddr->a4)
571 if (!ipv6_addr_equal((struct in6_addr *)daddr,
585 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
587 unsigned int h = xfrm_src_hash(saddr, family);
588 struct xfrm_state *x;
589 struct hlist_node *entry;
591 hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
592 if (x->props.family != family ||
593 x->id.proto != proto)
598 if (x->id.daddr.a4 != daddr->a4 ||
599 x->props.saddr.a4 != saddr->a4)
603 if (!ipv6_addr_equal((struct in6_addr *)daddr,
606 !ipv6_addr_equal((struct in6_addr *)saddr,
620 static inline struct xfrm_state *
621 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
624 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
625 x->id.proto, family);
627 return __xfrm_state_lookup_byaddr(&x->id.daddr,
629 x->id.proto, family);
633 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
634 struct flowi *fl, struct xfrm_tmpl *tmpl,
635 struct xfrm_policy *pol, int *err,
636 unsigned short family)
638 unsigned int h = xfrm_dst_hash(daddr, family);
639 struct hlist_node *entry;
640 struct xfrm_state *x, *x0;
641 int acquire_in_progress = 0;
643 struct xfrm_state *best = NULL;
645 spin_lock_bh(&xfrm_state_lock);
646 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
647 if (x->props.family == family &&
648 x->props.reqid == tmpl->reqid &&
649 !(x->props.flags & XFRM_STATE_WILDRECV) &&
650 xfrm_state_addr_check(x, daddr, saddr, family) &&
651 tmpl->mode == x->props.mode &&
652 tmpl->id.proto == x->id.proto &&
653 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
655 1. There is a valid state with matching selector.
657 2. Valid state with inappropriate selector. Skip.
659 Entering area of "sysdeps".
661 3. If state is not valid, selector is temporary,
662 it selects only session which triggered
663 previous resolution. Key manager will do
664 something to install a state with proper
667 if (x->km.state == XFRM_STATE_VALID) {
668 if (!xfrm_selector_match(&x->sel, fl, family) ||
669 !security_xfrm_state_pol_flow_match(x, pol, fl))
672 best->km.dying > x->km.dying ||
673 (best->km.dying == x->km.dying &&
674 best->curlft.add_time < x->curlft.add_time))
676 } else if (x->km.state == XFRM_STATE_ACQ) {
677 acquire_in_progress = 1;
678 } else if (x->km.state == XFRM_STATE_ERROR ||
679 x->km.state == XFRM_STATE_EXPIRED) {
680 if (xfrm_selector_match(&x->sel, fl, family) &&
681 security_xfrm_state_pol_flow_match(x, pol, fl))
688 if (!x && !error && !acquire_in_progress) {
690 (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
691 tmpl->id.proto, family)) != NULL) {
696 x = xfrm_state_alloc();
701 /* Initialize temporary selector matching only
702 * to current session. */
703 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
705 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
707 x->km.state = XFRM_STATE_DEAD;
713 if (km_query(x, tmpl, pol) == 0) {
714 x->km.state = XFRM_STATE_ACQ;
715 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
717 h = xfrm_src_hash(saddr, family);
718 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
721 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
722 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
725 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
727 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
728 add_timer(&x->timer);
730 x->km.state = XFRM_STATE_DEAD;
740 *err = acquire_in_progress ? -EAGAIN : error;
741 spin_unlock_bh(&xfrm_state_lock);
745 static void __xfrm_state_insert(struct xfrm_state *x)
747 unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family);
749 x->genid = ++xfrm_state_genid;
751 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
754 h = xfrm_src_hash(&x->props.saddr, x->props.family);
756 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
759 if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) {
760 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
763 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
767 if (!mod_timer(&x->timer, jiffies + HZ))
770 if (x->replay_maxage &&
771 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
778 if (x->bydst.next != NULL &&
779 (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
780 xfrm_state_num > xfrm_state_hmask)
781 schedule_work(&xfrm_hash_work);
784 void xfrm_state_insert(struct xfrm_state *x)
786 spin_lock_bh(&xfrm_state_lock);
787 __xfrm_state_insert(x);
788 spin_unlock_bh(&xfrm_state_lock);
790 xfrm_flush_all_bundles();
792 EXPORT_SYMBOL(xfrm_state_insert);
794 /* xfrm_state_lock is held */
795 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
797 unsigned int h = xfrm_dst_hash(daddr, family);
798 struct hlist_node *entry;
799 struct xfrm_state *x;
801 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
802 if (x->props.reqid != reqid ||
803 x->props.mode != mode ||
804 x->props.family != family ||
805 x->km.state != XFRM_STATE_ACQ ||
811 if (x->id.daddr.a4 != daddr->a4 ||
812 x->props.saddr.a4 != saddr->a4)
816 if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
817 (struct in6_addr *)daddr) ||
818 !ipv6_addr_equal((struct in6_addr *)
820 (struct in6_addr *)saddr))
832 x = xfrm_state_alloc();
836 x->sel.daddr.a4 = daddr->a4;
837 x->sel.saddr.a4 = saddr->a4;
838 x->sel.prefixlen_d = 32;
839 x->sel.prefixlen_s = 32;
840 x->props.saddr.a4 = saddr->a4;
841 x->id.daddr.a4 = daddr->a4;
845 ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
846 (struct in6_addr *)daddr);
847 ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
848 (struct in6_addr *)saddr);
849 x->sel.prefixlen_d = 128;
850 x->sel.prefixlen_s = 128;
851 ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
852 (struct in6_addr *)saddr);
853 ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
854 (struct in6_addr *)daddr);
858 x->km.state = XFRM_STATE_ACQ;
860 x->props.family = family;
861 x->props.mode = mode;
862 x->props.reqid = reqid;
863 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
865 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
866 add_timer(&x->timer);
868 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
869 h = xfrm_src_hash(saddr, family);
871 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
878 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
880 int xfrm_state_add(struct xfrm_state *x)
882 struct xfrm_state *x1;
885 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
887 family = x->props.family;
889 spin_lock_bh(&xfrm_state_lock);
891 x1 = __xfrm_state_locate(x, use_spi, family);
899 if (use_spi && x->km.seq) {
900 x1 = __xfrm_find_acq_byseq(x->km.seq);
901 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
908 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
910 &x->id.daddr, &x->props.saddr, 0);
912 __xfrm_state_insert(x);
916 spin_unlock_bh(&xfrm_state_lock);
919 xfrm_flush_all_bundles();
922 xfrm_state_delete(x1);
928 EXPORT_SYMBOL(xfrm_state_add);
930 int xfrm_state_update(struct xfrm_state *x)
932 struct xfrm_state *x1;
934 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
936 spin_lock_bh(&xfrm_state_lock);
937 x1 = __xfrm_state_locate(x, use_spi, x->props.family);
943 if (xfrm_state_kern(x1)) {
949 if (x1->km.state == XFRM_STATE_ACQ) {
950 __xfrm_state_insert(x);
956 spin_unlock_bh(&xfrm_state_lock);
962 xfrm_state_delete(x1);
968 spin_lock_bh(&x1->lock);
969 if (likely(x1->km.state == XFRM_STATE_VALID)) {
970 if (x->encap && x1->encap)
971 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
972 if (x->coaddr && x1->coaddr) {
973 memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
975 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
976 memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
977 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
980 if (!mod_timer(&x1->timer, jiffies + HZ))
982 if (x1->curlft.use_time)
983 xfrm_state_check_expire(x1);
987 spin_unlock_bh(&x1->lock);
993 EXPORT_SYMBOL(xfrm_state_update);
995 int xfrm_state_check_expire(struct xfrm_state *x)
997 if (!x->curlft.use_time)
998 x->curlft.use_time = (unsigned long)xtime.tv_sec;
1000 if (x->km.state != XFRM_STATE_VALID)
1003 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1004 x->curlft.packets >= x->lft.hard_packet_limit) {
1005 x->km.state = XFRM_STATE_EXPIRED;
1006 if (!mod_timer(&x->timer, jiffies))
1012 (x->curlft.bytes >= x->lft.soft_byte_limit ||
1013 x->curlft.packets >= x->lft.soft_packet_limit)) {
1015 km_state_expired(x, 0, 0);
1019 EXPORT_SYMBOL(xfrm_state_check_expire);
1021 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1023 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1024 - skb_headroom(skb);
1027 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1029 /* Check tail too... */
1033 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1035 int err = xfrm_state_check_expire(x);
1038 err = xfrm_state_check_space(x, skb);
1042 EXPORT_SYMBOL(xfrm_state_check);
1045 xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
1046 unsigned short family)
1048 struct xfrm_state *x;
1050 spin_lock_bh(&xfrm_state_lock);
1051 x = __xfrm_state_lookup(daddr, spi, proto, family);
1052 spin_unlock_bh(&xfrm_state_lock);
1055 EXPORT_SYMBOL(xfrm_state_lookup);
1058 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1059 u8 proto, unsigned short family)
1061 struct xfrm_state *x;
1063 spin_lock_bh(&xfrm_state_lock);
1064 x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1065 spin_unlock_bh(&xfrm_state_lock);
1068 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1071 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
1072 xfrm_address_t *daddr, xfrm_address_t *saddr,
1073 int create, unsigned short family)
1075 struct xfrm_state *x;
1077 spin_lock_bh(&xfrm_state_lock);
1078 x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1079 spin_unlock_bh(&xfrm_state_lock);
1083 EXPORT_SYMBOL(xfrm_find_acq);
1085 #ifdef CONFIG_XFRM_SUB_POLICY
1087 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1088 unsigned short family)
1091 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1093 return -EAFNOSUPPORT;
1095 spin_lock_bh(&xfrm_state_lock);
1096 if (afinfo->tmpl_sort)
1097 err = afinfo->tmpl_sort(dst, src, n);
1098 spin_unlock_bh(&xfrm_state_lock);
1099 xfrm_state_put_afinfo(afinfo);
1102 EXPORT_SYMBOL(xfrm_tmpl_sort);
1105 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1106 unsigned short family)
1109 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1111 return -EAFNOSUPPORT;
1113 spin_lock_bh(&xfrm_state_lock);
1114 if (afinfo->state_sort)
1115 err = afinfo->state_sort(dst, src, n);
1116 spin_unlock_bh(&xfrm_state_lock);
1117 xfrm_state_put_afinfo(afinfo);
1120 EXPORT_SYMBOL(xfrm_state_sort);
1123 /* Silly enough, but I'm lazy to build resolution list */
1125 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1129 for (i = 0; i <= xfrm_state_hmask; i++) {
1130 struct hlist_node *entry;
1131 struct xfrm_state *x;
1133 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1134 if (x->km.seq == seq &&
1135 x->km.state == XFRM_STATE_ACQ) {
1144 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1146 struct xfrm_state *x;
1148 spin_lock_bh(&xfrm_state_lock);
1149 x = __xfrm_find_acq_byseq(seq);
1150 spin_unlock_bh(&xfrm_state_lock);
1153 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1155 u32 xfrm_get_acqseq(void)
1159 static DEFINE_SPINLOCK(acqseq_lock);
1161 spin_lock_bh(&acqseq_lock);
1162 res = (++acqseq ? : ++acqseq);
1163 spin_unlock_bh(&acqseq_lock);
1166 EXPORT_SYMBOL(xfrm_get_acqseq);
1169 xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
1172 struct xfrm_state *x0;
1177 if (minspi == maxspi) {
1178 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1186 minspi = ntohl(minspi);
1187 maxspi = ntohl(maxspi);
1188 for (h=0; h<maxspi-minspi+1; h++) {
1189 spi = minspi + net_random()%(maxspi-minspi+1);
1190 x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1192 x->id.spi = htonl(spi);
1199 spin_lock_bh(&xfrm_state_lock);
1200 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1201 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1203 spin_unlock_bh(&xfrm_state_lock);
1207 EXPORT_SYMBOL(xfrm_alloc_spi);
1209 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1213 struct xfrm_state *x;
1214 struct hlist_node *entry;
1218 spin_lock_bh(&xfrm_state_lock);
1219 for (i = 0; i <= xfrm_state_hmask; i++) {
1220 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1221 if (xfrm_id_proto_match(x->id.proto, proto))
1230 for (i = 0; i <= xfrm_state_hmask; i++) {
1231 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1232 if (!xfrm_id_proto_match(x->id.proto, proto))
1234 err = func(x, --count, data);
1240 spin_unlock_bh(&xfrm_state_lock);
1243 EXPORT_SYMBOL(xfrm_state_walk);
1246 void xfrm_replay_notify(struct xfrm_state *x, int event)
1249 /* we send notify messages in case
1250 * 1. we updated on of the sequence numbers, and the seqno difference
1251 * is at least x->replay_maxdiff, in this case we also update the
1252 * timeout of our timer function
1253 * 2. if x->replay_maxage has elapsed since last update,
1254 * and there were changes
1256 * The state structure must be locked!
1260 case XFRM_REPLAY_UPDATE:
1261 if (x->replay_maxdiff &&
1262 (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1263 (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1264 if (x->xflags & XFRM_TIME_DEFER)
1265 event = XFRM_REPLAY_TIMEOUT;
1272 case XFRM_REPLAY_TIMEOUT:
1273 if ((x->replay.seq == x->preplay.seq) &&
1274 (x->replay.bitmap == x->preplay.bitmap) &&
1275 (x->replay.oseq == x->preplay.oseq)) {
1276 x->xflags |= XFRM_TIME_DEFER;
1283 memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1284 c.event = XFRM_MSG_NEWAE;
1285 c.data.aevent = event;
1286 km_state_notify(x, &c);
1288 if (x->replay_maxage &&
1289 !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) {
1291 x->xflags &= ~XFRM_TIME_DEFER;
1294 EXPORT_SYMBOL(xfrm_replay_notify);
1296 static void xfrm_replay_timer_handler(unsigned long data)
1298 struct xfrm_state *x = (struct xfrm_state*)data;
1300 spin_lock(&x->lock);
1302 if (x->km.state == XFRM_STATE_VALID) {
1303 if (xfrm_aevent_is_on())
1304 xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1306 x->xflags |= XFRM_TIME_DEFER;
1309 spin_unlock(&x->lock);
1313 int xfrm_replay_check(struct xfrm_state *x, u32 seq)
1319 if (unlikely(seq == 0))
1322 if (likely(seq > x->replay.seq))
1325 diff = x->replay.seq - seq;
1326 if (diff >= x->props.replay_window) {
1327 x->stats.replay_window++;
1331 if (x->replay.bitmap & (1U << diff)) {
1337 EXPORT_SYMBOL(xfrm_replay_check);
1339 void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
1345 if (seq > x->replay.seq) {
1346 diff = seq - x->replay.seq;
1347 if (diff < x->props.replay_window)
1348 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1350 x->replay.bitmap = 1;
1351 x->replay.seq = seq;
1353 diff = x->replay.seq - seq;
1354 x->replay.bitmap |= (1U << diff);
1357 if (xfrm_aevent_is_on())
1358 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1360 EXPORT_SYMBOL(xfrm_replay_advance);
1362 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1363 static DEFINE_RWLOCK(xfrm_km_lock);
1365 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1367 struct xfrm_mgr *km;
1369 read_lock(&xfrm_km_lock);
1370 list_for_each_entry(km, &xfrm_km_list, list)
1371 if (km->notify_policy)
1372 km->notify_policy(xp, dir, c);
1373 read_unlock(&xfrm_km_lock);
1376 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1378 struct xfrm_mgr *km;
1379 read_lock(&xfrm_km_lock);
1380 list_for_each_entry(km, &xfrm_km_list, list)
1383 read_unlock(&xfrm_km_lock);
1386 EXPORT_SYMBOL(km_policy_notify);
1387 EXPORT_SYMBOL(km_state_notify);
1389 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1395 c.event = XFRM_MSG_EXPIRE;
1396 km_state_notify(x, &c);
1402 EXPORT_SYMBOL(km_state_expired);
1404 * We send to all registered managers regardless of failure
1405 * We are happy with one success
1407 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1409 int err = -EINVAL, acqret;
1410 struct xfrm_mgr *km;
1412 read_lock(&xfrm_km_lock);
1413 list_for_each_entry(km, &xfrm_km_list, list) {
1414 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1418 read_unlock(&xfrm_km_lock);
1421 EXPORT_SYMBOL(km_query);
1423 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1426 struct xfrm_mgr *km;
1428 read_lock(&xfrm_km_lock);
1429 list_for_each_entry(km, &xfrm_km_list, list) {
1430 if (km->new_mapping)
1431 err = km->new_mapping(x, ipaddr, sport);
1435 read_unlock(&xfrm_km_lock);
1438 EXPORT_SYMBOL(km_new_mapping);
1440 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1446 c.event = XFRM_MSG_POLEXPIRE;
1447 km_policy_notify(pol, dir, &c);
1452 EXPORT_SYMBOL(km_policy_expired);
1454 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1458 struct xfrm_mgr *km;
1460 read_lock(&xfrm_km_lock);
1461 list_for_each_entry(km, &xfrm_km_list, list) {
1463 ret = km->report(proto, sel, addr);
1468 read_unlock(&xfrm_km_lock);
1471 EXPORT_SYMBOL(km_report);
1473 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1477 struct xfrm_mgr *km;
1478 struct xfrm_policy *pol = NULL;
1480 if (optlen <= 0 || optlen > PAGE_SIZE)
1483 data = kmalloc(optlen, GFP_KERNEL);
1488 if (copy_from_user(data, optval, optlen))
1492 read_lock(&xfrm_km_lock);
1493 list_for_each_entry(km, &xfrm_km_list, list) {
1494 pol = km->compile_policy(sk, optname, data,
1499 read_unlock(&xfrm_km_lock);
1502 xfrm_sk_policy_insert(sk, err, pol);
1511 EXPORT_SYMBOL(xfrm_user_policy);
1513 int xfrm_register_km(struct xfrm_mgr *km)
1515 write_lock_bh(&xfrm_km_lock);
1516 list_add_tail(&km->list, &xfrm_km_list);
1517 write_unlock_bh(&xfrm_km_lock);
1520 EXPORT_SYMBOL(xfrm_register_km);
1522 int xfrm_unregister_km(struct xfrm_mgr *km)
1524 write_lock_bh(&xfrm_km_lock);
1525 list_del(&km->list);
1526 write_unlock_bh(&xfrm_km_lock);
1529 EXPORT_SYMBOL(xfrm_unregister_km);
1531 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1534 if (unlikely(afinfo == NULL))
1536 if (unlikely(afinfo->family >= NPROTO))
1537 return -EAFNOSUPPORT;
1538 write_lock_bh(&xfrm_state_afinfo_lock);
1539 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1542 xfrm_state_afinfo[afinfo->family] = afinfo;
1543 write_unlock_bh(&xfrm_state_afinfo_lock);
1546 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1548 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1551 if (unlikely(afinfo == NULL))
1553 if (unlikely(afinfo->family >= NPROTO))
1554 return -EAFNOSUPPORT;
1555 write_lock_bh(&xfrm_state_afinfo_lock);
1556 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1557 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1560 xfrm_state_afinfo[afinfo->family] = NULL;
1562 write_unlock_bh(&xfrm_state_afinfo_lock);
1565 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1567 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1569 struct xfrm_state_afinfo *afinfo;
1570 if (unlikely(family >= NPROTO))
1572 read_lock(&xfrm_state_afinfo_lock);
1573 afinfo = xfrm_state_afinfo[family];
1574 if (unlikely(!afinfo))
1575 read_unlock(&xfrm_state_afinfo_lock);
1579 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1581 read_unlock(&xfrm_state_afinfo_lock);
1584 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1585 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1588 struct xfrm_state *t = x->tunnel;
1590 if (atomic_read(&t->tunnel_users) == 2)
1591 xfrm_state_delete(t);
1592 atomic_dec(&t->tunnel_users);
1597 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1600 * This function is NOT optimal. For example, with ESP it will give an
1601 * MTU that's usually two bytes short of being optimal. However, it will
1602 * usually give an answer that's a multiple of 4 provided the input is
1603 * also a multiple of 4.
1605 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1609 res -= x->props.header_len;
1617 spin_lock_bh(&x->lock);
1618 if (x->km.state == XFRM_STATE_VALID &&
1619 x->type && x->type->get_max_size)
1620 m = x->type->get_max_size(x, m);
1622 m += x->props.header_len;
1623 spin_unlock_bh(&x->lock);
1633 int xfrm_init_state(struct xfrm_state *x)
1635 struct xfrm_state_afinfo *afinfo;
1636 int family = x->props.family;
1639 err = -EAFNOSUPPORT;
1640 afinfo = xfrm_state_get_afinfo(family);
1645 if (afinfo->init_flags)
1646 err = afinfo->init_flags(x);
1648 xfrm_state_put_afinfo(afinfo);
1653 err = -EPROTONOSUPPORT;
1654 x->type = xfrm_get_type(x->id.proto, family);
1655 if (x->type == NULL)
1658 err = x->type->init_state(x);
1662 x->mode = xfrm_get_mode(x->props.mode, family);
1663 if (x->mode == NULL)
1666 x->km.state = XFRM_STATE_VALID;
1672 EXPORT_SYMBOL(xfrm_init_state);
1674 void __init xfrm_state_init(void)
1678 sz = sizeof(struct hlist_head) * 8;
1680 xfrm_state_bydst = xfrm_state_hash_alloc(sz);
1681 xfrm_state_bysrc = xfrm_state_hash_alloc(sz);
1682 xfrm_state_byspi = xfrm_state_hash_alloc(sz);
1683 if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1684 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1685 xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1687 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);