2 * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem.
4 * Begun April 1, 1996, Mike Shaver.
5 * Added /proc/sys/net/ipv4 directory entry (empty =) ). [MS]
9 #include <linux/module.h>
10 #include <linux/sysctl.h>
11 #include <linux/igmp.h>
12 #include <linux/inetdevice.h>
13 #include <linux/seqlock.h>
14 #include <linux/init.h>
15 #include <linux/slab.h>
16 #include <linux/nsproxy.h>
17 #include <linux/swap.h>
21 #include <net/route.h>
24 #include <net/cipso_ipv4.h>
25 #include <net/inet_frag.h>
27 #include <net/tcp_memcontrol.h>
31 static int tcp_retr1_max = 255;
32 static int ip_local_port_range_min[] = { 1, 1 };
33 static int ip_local_port_range_max[] = { 65535, 65535 };
34 static int tcp_adv_win_scale_min = -31;
35 static int tcp_adv_win_scale_max = 31;
36 static int ip_ttl_min = 1;
37 static int ip_ttl_max = 255;
38 static int ip_ping_group_range_min[] = { 0, 0 };
39 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
41 /* Update system visible IP port range */
42 static void set_local_port_range(int range[2])
44 write_seqlock(&sysctl_local_ports.lock);
45 sysctl_local_ports.range[0] = range[0];
46 sysctl_local_ports.range[1] = range[1];
47 write_sequnlock(&sysctl_local_ports.lock);
50 /* Validate changes from /proc interface. */
51 static int ipv4_local_port_range(ctl_table *table, int write,
53 size_t *lenp, loff_t *ppos)
59 .maxlen = sizeof(range),
61 .extra1 = &ip_local_port_range_min,
62 .extra2 = &ip_local_port_range_max,
65 inet_get_local_port_range(range, range + 1);
66 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
68 if (write && ret == 0) {
69 if (range[1] < range[0])
72 set_local_port_range(range);
79 static void inet_get_ping_group_range_table(struct ctl_table *table, gid_t *low, gid_t *high)
81 gid_t *data = table->data;
84 seq = read_seqbegin(&sysctl_local_ports.lock);
88 } while (read_seqretry(&sysctl_local_ports.lock, seq));
91 /* Update system visible IP port range */
92 static void set_ping_group_range(struct ctl_table *table, gid_t range[2])
94 gid_t *data = table->data;
95 write_seqlock(&sysctl_local_ports.lock);
98 write_sequnlock(&sysctl_local_ports.lock);
101 /* Validate changes from /proc interface. */
102 static int ipv4_ping_group_range(ctl_table *table, int write,
104 size_t *lenp, loff_t *ppos)
110 .maxlen = sizeof(range),
112 .extra1 = &ip_ping_group_range_min,
113 .extra2 = &ip_ping_group_range_max,
116 inet_get_ping_group_range_table(table, range, range + 1);
117 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
119 if (write && ret == 0)
120 set_ping_group_range(table, range);
125 static int proc_tcp_congestion_control(ctl_table *ctl, int write,
126 void __user *buffer, size_t *lenp, loff_t *ppos)
128 char val[TCP_CA_NAME_MAX];
131 .maxlen = TCP_CA_NAME_MAX,
135 tcp_get_default_congestion_control(val);
137 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
138 if (write && ret == 0)
139 ret = tcp_set_default_congestion_control(val);
143 static int proc_tcp_available_congestion_control(ctl_table *ctl,
145 void __user *buffer, size_t *lenp,
148 ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, };
151 tbl.data = kmalloc(tbl.maxlen, GFP_USER);
154 tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
155 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
160 static int proc_allowed_congestion_control(ctl_table *ctl,
162 void __user *buffer, size_t *lenp,
165 ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
168 tbl.data = kmalloc(tbl.maxlen, GFP_USER);
172 tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
173 ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
174 if (write && ret == 0)
175 ret = tcp_set_allowed_congestion_control(tbl.data);
180 static int ipv4_tcp_mem(ctl_table *ctl, int write,
181 void __user *buffer, size_t *lenp,
185 unsigned long vec[3];
186 struct net *net = current->nsproxy->net_ns;
187 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
188 struct mem_cgroup *memcg;
193 .maxlen = sizeof(vec),
198 ctl->data = &net->ipv4.sysctl_tcp_mem;
199 return proc_doulongvec_minmax(ctl, write, buffer, lenp, ppos);
202 ret = proc_doulongvec_minmax(&tmp, write, buffer, lenp, ppos);
206 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
208 memcg = mem_cgroup_from_task(current);
210 tcp_prot_mem(memcg, vec[0], 0);
211 tcp_prot_mem(memcg, vec[1], 1);
212 tcp_prot_mem(memcg, vec[2], 2);
216 net->ipv4.sysctl_tcp_mem[0] = vec[0];
217 net->ipv4.sysctl_tcp_mem[1] = vec[1];
218 net->ipv4.sysctl_tcp_mem[2] = vec[2];
223 static struct ctl_table ipv4_table[] = {
225 .procname = "tcp_timestamps",
226 .data = &sysctl_tcp_timestamps,
227 .maxlen = sizeof(int),
229 .proc_handler = proc_dointvec
232 .procname = "tcp_window_scaling",
233 .data = &sysctl_tcp_window_scaling,
234 .maxlen = sizeof(int),
236 .proc_handler = proc_dointvec
239 .procname = "tcp_sack",
240 .data = &sysctl_tcp_sack,
241 .maxlen = sizeof(int),
243 .proc_handler = proc_dointvec
246 .procname = "tcp_retrans_collapse",
247 .data = &sysctl_tcp_retrans_collapse,
248 .maxlen = sizeof(int),
250 .proc_handler = proc_dointvec
253 .procname = "ip_default_ttl",
254 .data = &sysctl_ip_default_ttl,
255 .maxlen = sizeof(int),
257 .proc_handler = proc_dointvec_minmax,
258 .extra1 = &ip_ttl_min,
259 .extra2 = &ip_ttl_max,
262 .procname = "ip_no_pmtu_disc",
263 .data = &ipv4_config.no_pmtu_disc,
264 .maxlen = sizeof(int),
266 .proc_handler = proc_dointvec
269 .procname = "ip_nonlocal_bind",
270 .data = &sysctl_ip_nonlocal_bind,
271 .maxlen = sizeof(int),
273 .proc_handler = proc_dointvec
276 .procname = "tcp_syn_retries",
277 .data = &sysctl_tcp_syn_retries,
278 .maxlen = sizeof(int),
280 .proc_handler = proc_dointvec
283 .procname = "tcp_synack_retries",
284 .data = &sysctl_tcp_synack_retries,
285 .maxlen = sizeof(int),
287 .proc_handler = proc_dointvec
290 .procname = "tcp_max_orphans",
291 .data = &sysctl_tcp_max_orphans,
292 .maxlen = sizeof(int),
294 .proc_handler = proc_dointvec
297 .procname = "tcp_max_tw_buckets",
298 .data = &tcp_death_row.sysctl_max_tw_buckets,
299 .maxlen = sizeof(int),
301 .proc_handler = proc_dointvec
304 .procname = "ip_early_demux",
305 .data = &sysctl_ip_early_demux,
306 .maxlen = sizeof(int),
308 .proc_handler = proc_dointvec
311 .procname = "ip_dynaddr",
312 .data = &sysctl_ip_dynaddr,
313 .maxlen = sizeof(int),
315 .proc_handler = proc_dointvec
318 .procname = "tcp_keepalive_time",
319 .data = &sysctl_tcp_keepalive_time,
320 .maxlen = sizeof(int),
322 .proc_handler = proc_dointvec_jiffies,
325 .procname = "tcp_keepalive_probes",
326 .data = &sysctl_tcp_keepalive_probes,
327 .maxlen = sizeof(int),
329 .proc_handler = proc_dointvec
332 .procname = "tcp_keepalive_intvl",
333 .data = &sysctl_tcp_keepalive_intvl,
334 .maxlen = sizeof(int),
336 .proc_handler = proc_dointvec_jiffies,
339 .procname = "tcp_retries1",
340 .data = &sysctl_tcp_retries1,
341 .maxlen = sizeof(int),
343 .proc_handler = proc_dointvec_minmax,
344 .extra2 = &tcp_retr1_max
347 .procname = "tcp_retries2",
348 .data = &sysctl_tcp_retries2,
349 .maxlen = sizeof(int),
351 .proc_handler = proc_dointvec
354 .procname = "tcp_fin_timeout",
355 .data = &sysctl_tcp_fin_timeout,
356 .maxlen = sizeof(int),
358 .proc_handler = proc_dointvec_jiffies,
360 #ifdef CONFIG_SYN_COOKIES
362 .procname = "tcp_syncookies",
363 .data = &sysctl_tcp_syncookies,
364 .maxlen = sizeof(int),
366 .proc_handler = proc_dointvec
370 .procname = "tcp_fastopen",
371 .data = &sysctl_tcp_fastopen,
372 .maxlen = sizeof(int),
374 .proc_handler = proc_dointvec,
377 .procname = "tcp_tw_recycle",
378 .data = &tcp_death_row.sysctl_tw_recycle,
379 .maxlen = sizeof(int),
381 .proc_handler = proc_dointvec
384 .procname = "tcp_abort_on_overflow",
385 .data = &sysctl_tcp_abort_on_overflow,
386 .maxlen = sizeof(int),
388 .proc_handler = proc_dointvec
391 .procname = "tcp_stdurg",
392 .data = &sysctl_tcp_stdurg,
393 .maxlen = sizeof(int),
395 .proc_handler = proc_dointvec
398 .procname = "tcp_rfc1337",
399 .data = &sysctl_tcp_rfc1337,
400 .maxlen = sizeof(int),
402 .proc_handler = proc_dointvec
405 .procname = "tcp_max_syn_backlog",
406 .data = &sysctl_max_syn_backlog,
407 .maxlen = sizeof(int),
409 .proc_handler = proc_dointvec
412 .procname = "ip_local_port_range",
413 .data = &sysctl_local_ports.range,
414 .maxlen = sizeof(sysctl_local_ports.range),
416 .proc_handler = ipv4_local_port_range,
419 .procname = "ip_local_reserved_ports",
420 .data = NULL, /* initialized in sysctl_ipv4_init */
423 .proc_handler = proc_do_large_bitmap,
426 .procname = "igmp_max_memberships",
427 .data = &sysctl_igmp_max_memberships,
428 .maxlen = sizeof(int),
430 .proc_handler = proc_dointvec
433 .procname = "igmp_max_msf",
434 .data = &sysctl_igmp_max_msf,
435 .maxlen = sizeof(int),
437 .proc_handler = proc_dointvec
440 .procname = "inet_peer_threshold",
441 .data = &inet_peer_threshold,
442 .maxlen = sizeof(int),
444 .proc_handler = proc_dointvec
447 .procname = "inet_peer_minttl",
448 .data = &inet_peer_minttl,
449 .maxlen = sizeof(int),
451 .proc_handler = proc_dointvec_jiffies,
454 .procname = "inet_peer_maxttl",
455 .data = &inet_peer_maxttl,
456 .maxlen = sizeof(int),
458 .proc_handler = proc_dointvec_jiffies,
461 .procname = "tcp_orphan_retries",
462 .data = &sysctl_tcp_orphan_retries,
463 .maxlen = sizeof(int),
465 .proc_handler = proc_dointvec
468 .procname = "tcp_fack",
469 .data = &sysctl_tcp_fack,
470 .maxlen = sizeof(int),
472 .proc_handler = proc_dointvec
475 .procname = "tcp_reordering",
476 .data = &sysctl_tcp_reordering,
477 .maxlen = sizeof(int),
479 .proc_handler = proc_dointvec
482 .procname = "tcp_ecn",
483 .data = &sysctl_tcp_ecn,
484 .maxlen = sizeof(int),
486 .proc_handler = proc_dointvec
489 .procname = "tcp_dsack",
490 .data = &sysctl_tcp_dsack,
491 .maxlen = sizeof(int),
493 .proc_handler = proc_dointvec
496 .procname = "tcp_wmem",
497 .data = &sysctl_tcp_wmem,
498 .maxlen = sizeof(sysctl_tcp_wmem),
500 .proc_handler = proc_dointvec
503 .procname = "tcp_rmem",
504 .data = &sysctl_tcp_rmem,
505 .maxlen = sizeof(sysctl_tcp_rmem),
507 .proc_handler = proc_dointvec
510 .procname = "tcp_app_win",
511 .data = &sysctl_tcp_app_win,
512 .maxlen = sizeof(int),
514 .proc_handler = proc_dointvec
517 .procname = "tcp_adv_win_scale",
518 .data = &sysctl_tcp_adv_win_scale,
519 .maxlen = sizeof(int),
521 .proc_handler = proc_dointvec_minmax,
522 .extra1 = &tcp_adv_win_scale_min,
523 .extra2 = &tcp_adv_win_scale_max,
526 .procname = "tcp_tw_reuse",
527 .data = &sysctl_tcp_tw_reuse,
528 .maxlen = sizeof(int),
530 .proc_handler = proc_dointvec
533 .procname = "tcp_frto",
534 .data = &sysctl_tcp_frto,
535 .maxlen = sizeof(int),
537 .proc_handler = proc_dointvec
540 .procname = "tcp_frto_response",
541 .data = &sysctl_tcp_frto_response,
542 .maxlen = sizeof(int),
544 .proc_handler = proc_dointvec
547 .procname = "tcp_low_latency",
548 .data = &sysctl_tcp_low_latency,
549 .maxlen = sizeof(int),
551 .proc_handler = proc_dointvec
554 .procname = "tcp_no_metrics_save",
555 .data = &sysctl_tcp_nometrics_save,
556 .maxlen = sizeof(int),
558 .proc_handler = proc_dointvec,
561 .procname = "tcp_moderate_rcvbuf",
562 .data = &sysctl_tcp_moderate_rcvbuf,
563 .maxlen = sizeof(int),
565 .proc_handler = proc_dointvec,
568 .procname = "tcp_tso_win_divisor",
569 .data = &sysctl_tcp_tso_win_divisor,
570 .maxlen = sizeof(int),
572 .proc_handler = proc_dointvec,
575 .procname = "tcp_congestion_control",
577 .maxlen = TCP_CA_NAME_MAX,
578 .proc_handler = proc_tcp_congestion_control,
581 .procname = "tcp_abc",
582 .data = &sysctl_tcp_abc,
583 .maxlen = sizeof(int),
585 .proc_handler = proc_dointvec,
588 .procname = "tcp_mtu_probing",
589 .data = &sysctl_tcp_mtu_probing,
590 .maxlen = sizeof(int),
592 .proc_handler = proc_dointvec,
595 .procname = "tcp_base_mss",
596 .data = &sysctl_tcp_base_mss,
597 .maxlen = sizeof(int),
599 .proc_handler = proc_dointvec,
602 .procname = "tcp_workaround_signed_windows",
603 .data = &sysctl_tcp_workaround_signed_windows,
604 .maxlen = sizeof(int),
606 .proc_handler = proc_dointvec
609 .procname = "tcp_limit_output_bytes",
610 .data = &sysctl_tcp_limit_output_bytes,
611 .maxlen = sizeof(int),
613 .proc_handler = proc_dointvec
616 .procname = "tcp_challenge_ack_limit",
617 .data = &sysctl_tcp_challenge_ack_limit,
618 .maxlen = sizeof(int),
620 .proc_handler = proc_dointvec
622 #ifdef CONFIG_NET_DMA
624 .procname = "tcp_dma_copybreak",
625 .data = &sysctl_tcp_dma_copybreak,
626 .maxlen = sizeof(int),
628 .proc_handler = proc_dointvec
632 .procname = "tcp_slow_start_after_idle",
633 .data = &sysctl_tcp_slow_start_after_idle,
634 .maxlen = sizeof(int),
636 .proc_handler = proc_dointvec
638 #ifdef CONFIG_NETLABEL
640 .procname = "cipso_cache_enable",
641 .data = &cipso_v4_cache_enabled,
642 .maxlen = sizeof(int),
644 .proc_handler = proc_dointvec,
647 .procname = "cipso_cache_bucket_size",
648 .data = &cipso_v4_cache_bucketsize,
649 .maxlen = sizeof(int),
651 .proc_handler = proc_dointvec,
654 .procname = "cipso_rbm_optfmt",
655 .data = &cipso_v4_rbm_optfmt,
656 .maxlen = sizeof(int),
658 .proc_handler = proc_dointvec,
661 .procname = "cipso_rbm_strictvalid",
662 .data = &cipso_v4_rbm_strictvalid,
663 .maxlen = sizeof(int),
665 .proc_handler = proc_dointvec,
667 #endif /* CONFIG_NETLABEL */
669 .procname = "tcp_available_congestion_control",
670 .maxlen = TCP_CA_BUF_MAX,
672 .proc_handler = proc_tcp_available_congestion_control,
675 .procname = "tcp_allowed_congestion_control",
676 .maxlen = TCP_CA_BUF_MAX,
678 .proc_handler = proc_allowed_congestion_control,
681 .procname = "tcp_max_ssthresh",
682 .data = &sysctl_tcp_max_ssthresh,
683 .maxlen = sizeof(int),
685 .proc_handler = proc_dointvec,
688 .procname = "tcp_cookie_size",
689 .data = &sysctl_tcp_cookie_size,
690 .maxlen = sizeof(int),
692 .proc_handler = proc_dointvec
695 .procname = "tcp_thin_linear_timeouts",
696 .data = &sysctl_tcp_thin_linear_timeouts,
697 .maxlen = sizeof(int),
699 .proc_handler = proc_dointvec
702 .procname = "tcp_thin_dupack",
703 .data = &sysctl_tcp_thin_dupack,
704 .maxlen = sizeof(int),
706 .proc_handler = proc_dointvec
709 .procname = "tcp_early_retrans",
710 .data = &sysctl_tcp_early_retrans,
711 .maxlen = sizeof(int),
713 .proc_handler = proc_dointvec_minmax,
718 .procname = "udp_mem",
719 .data = &sysctl_udp_mem,
720 .maxlen = sizeof(sysctl_udp_mem),
722 .proc_handler = proc_doulongvec_minmax,
725 .procname = "udp_rmem_min",
726 .data = &sysctl_udp_rmem_min,
727 .maxlen = sizeof(sysctl_udp_rmem_min),
729 .proc_handler = proc_dointvec_minmax,
733 .procname = "udp_wmem_min",
734 .data = &sysctl_udp_wmem_min,
735 .maxlen = sizeof(sysctl_udp_wmem_min),
737 .proc_handler = proc_dointvec_minmax,
743 static struct ctl_table ipv4_net_table[] = {
745 .procname = "icmp_echo_ignore_all",
746 .data = &init_net.ipv4.sysctl_icmp_echo_ignore_all,
747 .maxlen = sizeof(int),
749 .proc_handler = proc_dointvec
752 .procname = "icmp_echo_ignore_broadcasts",
753 .data = &init_net.ipv4.sysctl_icmp_echo_ignore_broadcasts,
754 .maxlen = sizeof(int),
756 .proc_handler = proc_dointvec
759 .procname = "icmp_ignore_bogus_error_responses",
760 .data = &init_net.ipv4.sysctl_icmp_ignore_bogus_error_responses,
761 .maxlen = sizeof(int),
763 .proc_handler = proc_dointvec
766 .procname = "icmp_errors_use_inbound_ifaddr",
767 .data = &init_net.ipv4.sysctl_icmp_errors_use_inbound_ifaddr,
768 .maxlen = sizeof(int),
770 .proc_handler = proc_dointvec
773 .procname = "icmp_ratelimit",
774 .data = &init_net.ipv4.sysctl_icmp_ratelimit,
775 .maxlen = sizeof(int),
777 .proc_handler = proc_dointvec_ms_jiffies,
780 .procname = "icmp_ratemask",
781 .data = &init_net.ipv4.sysctl_icmp_ratemask,
782 .maxlen = sizeof(int),
784 .proc_handler = proc_dointvec
787 .procname = "rt_cache_rebuild_count",
788 .data = &init_net.ipv4.sysctl_rt_cache_rebuild_count,
789 .maxlen = sizeof(int),
791 .proc_handler = proc_dointvec
794 .procname = "ping_group_range",
795 .data = &init_net.ipv4.sysctl_ping_group_range,
796 .maxlen = sizeof(init_net.ipv4.sysctl_ping_group_range),
798 .proc_handler = ipv4_ping_group_range,
801 .procname = "tcp_mem",
802 .maxlen = sizeof(init_net.ipv4.sysctl_tcp_mem),
804 .proc_handler = ipv4_tcp_mem,
809 static __net_init int ipv4_sysctl_init_net(struct net *net)
811 struct ctl_table *table;
813 table = ipv4_net_table;
814 if (!net_eq(net, &init_net)) {
815 table = kmemdup(table, sizeof(ipv4_net_table), GFP_KERNEL);
820 &net->ipv4.sysctl_icmp_echo_ignore_all;
822 &net->ipv4.sysctl_icmp_echo_ignore_broadcasts;
824 &net->ipv4.sysctl_icmp_ignore_bogus_error_responses;
826 &net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr;
828 &net->ipv4.sysctl_icmp_ratelimit;
830 &net->ipv4.sysctl_icmp_ratemask;
832 &net->ipv4.sysctl_rt_cache_rebuild_count;
834 &net->ipv4.sysctl_ping_group_range;
839 * Sane defaults - nobody may create ping sockets.
840 * Boot scripts should set this to distro-specific group.
842 net->ipv4.sysctl_ping_group_range[0] = 1;
843 net->ipv4.sysctl_ping_group_range[1] = 0;
845 net->ipv4.sysctl_rt_cache_rebuild_count = 4;
849 net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table);
850 if (net->ipv4.ipv4_hdr == NULL)
856 if (!net_eq(net, &init_net))
862 static __net_exit void ipv4_sysctl_exit_net(struct net *net)
864 struct ctl_table *table;
866 table = net->ipv4.ipv4_hdr->ctl_table_arg;
867 unregister_net_sysctl_table(net->ipv4.ipv4_hdr);
871 static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
872 .init = ipv4_sysctl_init_net,
873 .exit = ipv4_sysctl_exit_net,
876 static __init int sysctl_ipv4_init(void)
878 struct ctl_table_header *hdr;
881 for (i = ipv4_table; i->procname; i++) {
882 if (strcmp(i->procname, "ip_local_reserved_ports") == 0) {
883 i->data = sysctl_local_reserved_ports;
890 hdr = register_net_sysctl(&init_net, "net/ipv4", ipv4_table);
894 if (register_pernet_subsys(&ipv4_sysctl_ops)) {
895 unregister_net_sysctl_table(hdr);
902 __initcall(sysctl_ipv4_init);