2 * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
4 * Begun April 1, 1996, Mike Shaver.
5 * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
9 #include <linux/module.h>
10 #include <linux/sysctl.h>
11 #include <linux/igmp.h>
12 #include <linux/inetdevice.h>
13 #include <linux/seqlock.h>
14 #include <linux/init.h>
15 #include <linux/slab.h>
16 #include <linux/nsproxy.h>
17 #include <linux/swap.h>
21 #include <net/route.h>
24 #include <net/cipso_ipv4.h>
25 #include <net/inet_frag.h>
27 #include <net/tcp_memcontrol.h>
31 static int tcp_retr1_max = 255;
32 static int ip_local_port_range_min[] = { 1, 1 };
33 static int ip_local_port_range_max[] = { 65535, 65535 };
34 static int tcp_adv_win_scale_min = -31;
35 static int tcp_adv_win_scale_max = 31;
36 static int ip_ttl_min = 1;
37 static int ip_ttl_max = 255;
38 static int ip_ping_group_range_min[] = { 0, 0 };
39 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
41 /* Update system visible IP port range */
42 static void set_local_port_range(int range[2])
44 write_seqlock(&sysctl_local_ports.lock);
45 sysctl_local_ports.range[0] = range[0];
46 sysctl_local_ports.range[1] = range[1];
47 write_sequnlock(&sysctl_local_ports.lock);
50 /* Validate changes from /proc interface. */
51 static int ipv4_local_port_range(ctl_table *table, int write,
53 size_t *lenp, loff_t *ppos)
59 .maxlen = sizeof(range),
61 .extra1 = &ip_local_port_range_min,
62 .extra2 = &ip_local_port_range_max,
65 inet_get_local_port_range(range, range + 1);
66 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
68 if (write && ret == 0) {
69 if (range[1] < range[0])
72 set_local_port_range(range);
79 static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
81 gid_t *data = table->data;
84 seq = read_seqbegin(&sysctl_local_ports.lock);
88 } while (read_seqretry(&sysctl_local_ports.lock, seq));
91 /* Update system visible IP port range */
92 static void set_ping_group_range(struct ctl_table *table, gid_t range[2])
94 gid_t *data = table->data;
95 write_seqlock(&sysctl_local_ports.lock);
98 write_sequnlock(&sysctl_local_ports.lock);
101 /* Validate changes from /proc interface. */
102 static int ipv4_ping_group_range(ctl_table *table, int write,
104 size_t *lenp, loff_t *ppos)
110 .maxlen = sizeof(range),
112 .extra1 = &ip_ping_group_range_min,
113 .extra2 = &ip_ping_group_range_max,
116 inet_get_ping_group_range_table(table, range, range + 1);
117 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
119 if (write && ret == 0)
120 set_ping_group_range(table, range);
125 static int proc_tcp_congestion_control(ctl_table *ctl, int write,
126 void __user *buffer, size_t *lenp, loff_t *ppos)
128 char val[TCP_CA_NAME_MAX];
131 .maxlen = TCP_CA_NAME_MAX,
135 tcp_get_default_congestion_control(val);
137 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
138 if (write && ret == 0)
139 ret = tcp_set_default_congestion_control(val);
143 static int proc_tcp_available_congestion_control(ctl_table *ctl,
145 void __user *buffer, size_t *lenp,
148 ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, };
151 tbl.data = kmalloc(tbl.maxlen, GFP_USER);
154 tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
155 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
160 static int proc_allowed_congestion_control(ctl_table *ctl,
162 void __user *buffer, size_t *lenp,
165 ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
168 tbl.data = kmalloc(tbl.maxlen, GFP_USER);
172 tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
173 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
174 if (write && ret == 0)
175 ret = tcp_set_allowed_congestion_control(tbl.data);
180 static int ipv4_tcp_mem(ctl_table *ctl, int write,
181 void __user *buffer, size_t *lenp,
185 unsigned long vec[3];
186 struct net *net = current->nsproxy->net_ns;
187 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
188 struct mem_cgroup *memcg;
193 .maxlen = sizeof(vec),
198 ctl->data = &net->ipv4.sysctl_tcp_mem;
199 return proc_doulongvec_minmax(ctl, write, buffer, lenp, ppos);
202 ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos);
206 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
208 memcg = mem_cgroup_from_task(current);
210 tcp_prot_mem(memcg, vec[0], 0);
211 tcp_prot_mem(memcg, vec[1], 1);
212 tcp_prot_mem(memcg, vec[2], 2);
216 net->ipv4.sysctl_tcp_mem[0] = vec[0];
217 net->ipv4.sysctl_tcp_mem[1] = vec[1];
218 net->ipv4.sysctl_tcp_mem[2] = vec[2];
223 static struct ctl_table ipv4_table[] = {
225 .procname = "tcp_timestamps",
226 .data = &sysctl_tcp_timestamps,
227 .maxlen = sizeof(int),
229 .proc_handler = proc_dointvec
232 .procname = "tcp_window_scaling",
233 .data = &sysctl_tcp_window_scaling,
234 .maxlen = sizeof(int),
236 .proc_handler = proc_dointvec
239 .procname = "tcp_sack",
240 .data = &sysctl_tcp_sack,
241 .maxlen = sizeof(int),
243 .proc_handler = proc_dointvec
246 .procname = "tcp_retrans_collapse",
247 .data = &sysctl_tcp_retrans_collapse,
248 .maxlen = sizeof(int),
250 .proc_handler = proc_dointvec
253 .procname = "ip_default_ttl",
254 .data = &sysctl_ip_default_ttl,
255 .maxlen = sizeof(int),
257 .proc_handler = proc_dointvec_minmax,
258 .extra1 = &ip_ttl_min,
259 .extra2 = &ip_ttl_max,
262 .procname = "ip_no_pmtu_disc",
263 .data = &ipv4_config.no_pmtu_disc,
264 .maxlen = sizeof(int),
266 .proc_handler = proc_dointvec
269 .procname = "ip_nonlocal_bind",
270 .data = &sysctl_ip_nonlocal_bind,
271 .maxlen = sizeof(int),
273 .proc_handler = proc_dointvec
276 .procname = "tcp_syn_retries",
277 .data = &sysctl_tcp_syn_retries,
278 .maxlen = sizeof(int),
280 .proc_handler = proc_dointvec
283 .procname = "tcp_synack_retries",
284 .data = &sysctl_tcp_synack_retries,
285 .maxlen = sizeof(int),
287 .proc_handler = proc_dointvec
290 .procname = "tcp_max_orphans",
291 .data = &sysctl_tcp_max_orphans,
292 .maxlen = sizeof(int),
294 .proc_handler = proc_dointvec
297 .procname = "tcp_max_tw_buckets",
298 .data = &tcp_death_row.sysctl_max_tw_buckets,
299 .maxlen = sizeof(int),
301 .proc_handler = proc_dointvec
304 .procname = "ip_dynaddr",
305 .data = &sysctl_ip_dynaddr,
306 .maxlen = sizeof(int),
308 .proc_handler = proc_dointvec
311 .procname = "tcp_keepalive_time",
312 .data = &sysctl_tcp_keepalive_time,
313 .maxlen = sizeof(int),
315 .proc_handler = proc_dointvec_jiffies,
318 .procname = "tcp_keepalive_probes",
319 .data = &sysctl_tcp_keepalive_probes,
320 .maxlen = sizeof(int),
322 .proc_handler = proc_dointvec
325 .procname = "tcp_keepalive_intvl",
326 .data = &sysctl_tcp_keepalive_intvl,
327 .maxlen = sizeof(int),
329 .proc_handler = proc_dointvec_jiffies,
332 .procname = "tcp_retries1",
333 .data = &sysctl_tcp_retries1,
334 .maxlen = sizeof(int),
336 .proc_handler = proc_dointvec_minmax,
337 .extra2 = &tcp_retr1_max
340 .procname = "tcp_retries2",
341 .data = &sysctl_tcp_retries2,
342 .maxlen = sizeof(int),
344 .proc_handler = proc_dointvec
347 .procname = "tcp_fin_timeout",
348 .data = &sysctl_tcp_fin_timeout,
349 .maxlen = sizeof(int),
351 .proc_handler = proc_dointvec_jiffies,
353 #ifdef CONFIG_SYN_COOKIES
355 .procname = "tcp_syncookies",
356 .data = &sysctl_tcp_syncookies,
357 .maxlen = sizeof(int),
359 .proc_handler = proc_dointvec
363 .procname = "tcp_tw_recycle",
364 .data = &tcp_death_row.sysctl_tw_recycle,
365 .maxlen = sizeof(int),
367 .proc_handler = proc_dointvec
370 .procname = "tcp_abort_on_overflow",
371 .data = &sysctl_tcp_abort_on_overflow,
372 .maxlen = sizeof(int),
374 .proc_handler = proc_dointvec
377 .procname = "tcp_stdurg",
378 .data = &sysctl_tcp_stdurg,
379 .maxlen = sizeof(int),
381 .proc_handler = proc_dointvec
384 .procname = "tcp_rfc1337",
385 .data = &sysctl_tcp_rfc1337,
386 .maxlen = sizeof(int),
388 .proc_handler = proc_dointvec
391 .procname = "tcp_max_syn_backlog",
392 .data = &sysctl_max_syn_backlog,
393 .maxlen = sizeof(int),
395 .proc_handler = proc_dointvec
398 .procname = "ip_local_port_range",
399 .data = &sysctl_local_ports.range,
400 .maxlen = sizeof(sysctl_local_ports.range),
402 .proc_handler = ipv4_local_port_range,
405 .procname = "ip_local_reserved_ports",
406 .data = NULL, /* initialized in sysctl_ipv4_init */
409 .proc_handler = proc_do_large_bitmap,
412 .procname = "igmp_max_memberships",
413 .data = &sysctl_igmp_max_memberships,
414 .maxlen = sizeof(int),
416 .proc_handler = proc_dointvec
419 .procname = "igmp_max_msf",
420 .data = &sysctl_igmp_max_msf,
421 .maxlen = sizeof(int),
423 .proc_handler = proc_dointvec
426 .procname = "inet_peer_threshold",
427 .data = &inet_peer_threshold,
428 .maxlen = sizeof(int),
430 .proc_handler = proc_dointvec
433 .procname = "inet_peer_minttl",
434 .data = &inet_peer_minttl,
435 .maxlen = sizeof(int),
437 .proc_handler = proc_dointvec_jiffies,
440 .procname = "inet_peer_maxttl",
441 .data = &inet_peer_maxttl,
442 .maxlen = sizeof(int),
444 .proc_handler = proc_dointvec_jiffies,
447 .procname = "tcp_orphan_retries",
448 .data = &sysctl_tcp_orphan_retries,
449 .maxlen = sizeof(int),
451 .proc_handler = proc_dointvec
454 .procname = "tcp_fack",
455 .data = &sysctl_tcp_fack,
456 .maxlen = sizeof(int),
458 .proc_handler = proc_dointvec
461 .procname = "tcp_reordering",
462 .data = &sysctl_tcp_reordering,
463 .maxlen = sizeof(int),
465 .proc_handler = proc_dointvec
468 .procname = "tcp_ecn",
469 .data = &sysctl_tcp_ecn,
470 .maxlen = sizeof(int),
472 .proc_handler = proc_dointvec
475 .procname = "tcp_dsack",
476 .data = &sysctl_tcp_dsack,
477 .maxlen = sizeof(int),
479 .proc_handler = proc_dointvec
482 .procname = "tcp_wmem",
483 .data = &sysctl_tcp_wmem,
484 .maxlen = sizeof(sysctl_tcp_wmem),
486 .proc_handler = proc_dointvec
489 .procname = "tcp_rmem",
490 .data = &sysctl_tcp_rmem,
491 .maxlen = sizeof(sysctl_tcp_rmem),
493 .proc_handler = proc_dointvec
496 .procname = "tcp_app_win",
497 .data = &sysctl_tcp_app_win,
498 .maxlen = sizeof(int),
500 .proc_handler = proc_dointvec
503 .procname = "tcp_adv_win_scale",
504 .data = &sysctl_tcp_adv_win_scale,
505 .maxlen = sizeof(int),
507 .proc_handler = proc_dointvec_minmax,
508 .extra1 = &tcp_adv_win_scale_min,
509 .extra2 = &tcp_adv_win_scale_max,
512 .procname = "tcp_tw_reuse",
513 .data = &sysctl_tcp_tw_reuse,
514 .maxlen = sizeof(int),
516 .proc_handler = proc_dointvec
519 .procname = "tcp_frto",
520 .data = &sysctl_tcp_frto,
521 .maxlen = sizeof(int),
523 .proc_handler = proc_dointvec
526 .procname = "tcp_frto_response",
527 .data = &sysctl_tcp_frto_response,
528 .maxlen = sizeof(int),
530 .proc_handler = proc_dointvec
533 .procname = "tcp_low_latency",
534 .data = &sysctl_tcp_low_latency,
535 .maxlen = sizeof(int),
537 .proc_handler = proc_dointvec
540 .procname = "tcp_no_metrics_save",
541 .data = &sysctl_tcp_nometrics_save,
542 .maxlen = sizeof(int),
544 .proc_handler = proc_dointvec,
547 .procname = "tcp_moderate_rcvbuf",
548 .data = &sysctl_tcp_moderate_rcvbuf,
549 .maxlen = sizeof(int),
551 .proc_handler = proc_dointvec,
554 .procname = "tcp_tso_win_divisor",
555 .data = &sysctl_tcp_tso_win_divisor,
556 .maxlen = sizeof(int),
558 .proc_handler = proc_dointvec,
561 .procname = "tcp_congestion_control",
563 .maxlen = TCP_CA_NAME_MAX,
564 .proc_handler = proc_tcp_congestion_control,
567 .procname = "tcp_abc",
568 .data = &sysctl_tcp_abc,
569 .maxlen = sizeof(int),
571 .proc_handler = proc_dointvec,
574 .procname = "tcp_mtu_probing",
575 .data = &sysctl_tcp_mtu_probing,
576 .maxlen = sizeof(int),
578 .proc_handler = proc_dointvec,
581 .procname = "tcp_base_mss",
582 .data = &sysctl_tcp_base_mss,
583 .maxlen = sizeof(int),
585 .proc_handler = proc_dointvec,
588 .procname = "tcp_workaround_signed_windows",
589 .data = &sysctl_tcp_workaround_signed_windows,
590 .maxlen = sizeof(int),
592 .proc_handler = proc_dointvec
594 #ifdef CONFIG_NET_DMA
596 .procname = "tcp_dma_copybreak",
597 .data = &sysctl_tcp_dma_copybreak,
598 .maxlen = sizeof(int),
600 .proc_handler = proc_dointvec
604 .procname = "tcp_slow_start_after_idle",
605 .data = &sysctl_tcp_slow_start_after_idle,
606 .maxlen = sizeof(int),
608 .proc_handler = proc_dointvec
610 #ifdef CONFIG_NETLABEL
612 .procname = "cipso_cache_enable",
613 .data = &cipso_v4_cache_enabled,
614 .maxlen = sizeof(int),
616 .proc_handler = proc_dointvec,
619 .procname = "cipso_cache_bucket_size",
620 .data = &cipso_v4_cache_bucketsize,
621 .maxlen = sizeof(int),
623 .proc_handler = proc_dointvec,
626 .procname = "cipso_rbm_optfmt",
627 .data = &cipso_v4_rbm_optfmt,
628 .maxlen = sizeof(int),
630 .proc_handler = proc_dointvec,
633 .procname = "cipso_rbm_strictvalid",
634 .data = &cipso_v4_rbm_strictvalid,
635 .maxlen = sizeof(int),
637 .proc_handler = proc_dointvec,
639 #endif /* CONFIG_NETLABEL */
641 .procname = "tcp_available_congestion_control",
642 .maxlen = TCP_CA_BUF_MAX,
644 .proc_handler = proc_tcp_available_congestion_control,
647 .procname = "tcp_allowed_congestion_control",
648 .maxlen = TCP_CA_BUF_MAX,
650 .proc_handler = proc_allowed_congestion_control,
653 .procname = "tcp_max_ssthresh",
654 .data = &sysctl_tcp_max_ssthresh,
655 .maxlen = sizeof(int),
657 .proc_handler = proc_dointvec,
660 .procname = "tcp_cookie_size",
661 .data = &sysctl_tcp_cookie_size,
662 .maxlen = sizeof(int),
664 .proc_handler = proc_dointvec
667 .procname = "tcp_thin_linear_timeouts",
668 .data = &sysctl_tcp_thin_linear_timeouts,
669 .maxlen = sizeof(int),
671 .proc_handler = proc_dointvec
674 .procname = "tcp_thin_dupack",
675 .data = &sysctl_tcp_thin_dupack,
676 .maxlen = sizeof(int),
678 .proc_handler = proc_dointvec
681 .procname = "tcp_early_retrans",
682 .data = &sysctl_tcp_early_retrans,
683 .maxlen = sizeof(int),
685 .proc_handler = proc_dointvec_minmax,
690 .procname = "udp_mem",
691 .data = &sysctl_udp_mem,
692 .maxlen = sizeof(sysctl_udp_mem),
694 .proc_handler = proc_doulongvec_minmax,
697 .procname = "udp_rmem_min",
698 .data = &sysctl_udp_rmem_min,
699 .maxlen = sizeof(sysctl_udp_rmem_min),
701 .proc_handler = proc_dointvec_minmax,
705 .procname = "udp_wmem_min",
706 .data = &sysctl_udp_wmem_min,
707 .maxlen = sizeof(sysctl_udp_wmem_min),
709 .proc_handler = proc_dointvec_minmax,
715 static struct ctl_table ipv4_net_table[] = {
717 .procname = "icmp_echo_ignore_all",
718 .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
719 .maxlen = sizeof(int),
721 .proc_handler = proc_dointvec
724 .procname = "icmp_echo_ignore_broadcasts",
725 .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
726 .maxlen = sizeof(int),
728 .proc_handler = proc_dointvec
731 .procname = "icmp_ignore_bogus_error_responses",
732 .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
733 .maxlen = sizeof(int),
735 .proc_handler = proc_dointvec
738 .procname = "icmp_errors_use_inbound_ifaddr",
739 .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
740 .maxlen = sizeof(int),
742 .proc_handler = proc_dointvec
745 .procname = "icmp_ratelimit",
746 .data = &init_net.ipv4.sysctl_icmp_ratelimit,
747 .maxlen = sizeof(int),
749 .proc_handler = proc_dointvec_ms_jiffies,
752 .procname = "icmp_ratemask",
753 .data = &init_net.ipv4.sysctl_icmp_ratemask,
754 .maxlen = sizeof(int),
756 .proc_handler = proc_dointvec
759 .procname = "rt_cache_rebuild_count",
760 .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count,
761 .maxlen = sizeof(int),
763 .proc_handler = proc_dointvec
766 .procname = "ping_group_range",
767 .data = &init_net.ipv4.sysctl_ping_group_range,
768 .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range),
770 .proc_handler = ipv4_ping_group_range,
773 .procname = "tcp_mem",
774 .maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem),
776 .proc_handler = ipv4_tcp_mem,
781 static __net_init int ipv4_sysctl_init_net(struct net *net)
783 struct ctl_table *table;
785 table = ipv4_net_table;
786 if (!net_eq(net, &init_net)) {
787 table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
792 &net->ipv4.sysctl_icmp_echo_ignore_all;
794 &net->ipv4.sysctl_icmp_echo_ignore_broadcasts;
796 &net->ipv4.sysctl_icmp_ignore_bogus_error_responses;
798 &net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr;
800 &net->ipv4.sysctl_icmp_ratelimit;
802 &net->ipv4.sysctl_icmp_ratemask;
804 &net->ipv4.sysctl_rt_cache_rebuild_count;
806 &net->ipv4.sysctl_ping_group_range;
811 * Sane defaults - nobody may create ping sockets.
812 * Boot scripts should set this to distro-specific group.
814 net->ipv4.sysctl_ping_group_range[0] = 1;
815 net->ipv4.sysctl_ping_group_range[1] = 0;
817 net->ipv4.sysctl_rt_cache_rebuild_count = 4;
821 net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
822 if (net->ipv4.ipv4_hdr == NULL)
828 if (!net_eq(net, &init_net))
834 static __net_exit void ipv4_sysctl_exit_net(struct net *net)
836 struct ctl_table *table;
838 table = net->ipv4.ipv4_hdr->ctl_table_arg;
839 unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
843 static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
844 .init = ipv4_sysctl_init_net,
845 .exit = ipv4_sysctl_exit_net,
848 static __init int sysctl_ipv4_init(void)
850 struct ctl_table_header *hdr;
853 for (i = ipv4_table; i->procname; i++) {
854 if (strcmp(i->procname, "ip_local_reserved_ports") == 0) {
855 i->data = sysctl_local_reserved_ports;
862 hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
866 if (register_pernet_subsys(&ipv4_sysctl_ops)) {
867 unregister_net_sysctl_table(hdr);
874 __initcall(sysctl_ipv4_init);