Skip to content

Commit

Permalink
feat: reduce downtime by increasing arp cache timeout
Browse files Browse the repository at this point in the history
When upgrade ovs-ovn if the arp cache timeout, new arp request cannot be processed and leads network unreachable error. Increase the base_reachable_time_ms and gc_stale_time to 3 minutes to prevent arp cache timeout during upgrade.

(cherry picked from commit 0adecb0)
  • Loading branch information
oilbeater committed Aug 30, 2022
1 parent 2b05fd4 commit 1510905
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 3 deletions.
12 changes: 12 additions & 0 deletions dist/images/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1766,6 +1766,9 @@ spec:
- name: OVN_DB_IPS
value: $addresses
volumeMounts:
- mountPath: /var/run/netns
name: host-ns
mountPropagation: HostToContainer
- mountPath: /lib/modules
name: host-modules
readOnly: true
Expand Down Expand Up @@ -1834,6 +1837,9 @@ spec:
- name: host-sys
hostPath:
path: /sys
- name: host-ns
hostPath:
path: /var/run/netns
- name: cni-conf
hostPath:
path: /etc/cni/net.d
Expand Down Expand Up @@ -2251,6 +2257,9 @@ spec:
- name: OVN_DB_IPS
value: $addresses
volumeMounts:
- mountPath: /var/run/netns
name: host-ns
mountPropagation: HostToContainer
- mountPath: /lib/modules
name: host-modules
readOnly: true
Expand Down Expand Up @@ -2314,6 +2323,9 @@ spec:
- name: host-sys
hostPath:
path: /sys
- name: host-ns
hostPath:
path: /var/run/netns
- name: cni-conf
hostPath:
path: /etc/cni/net.d
Expand Down
22 changes: 19 additions & 3 deletions dist/images/start-ovs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,18 @@ cat /proc/cmdline"
fi

function quit {
/usr/share/ovn/scripts/grace_stop_ovn_controller
/usr/share/openvswitch/scripts/ovs-ctl stop
exit 0
set +e
for netns in /var/run/netns/*; do
nsenter --net=$netns sysctl -w net.ipv4.neigh.eth0.base_reachable_time_ms=180000;
nsenter --net=$netns sysctl -w net.ipv4.neigh.eth0.gc_stale_time=180;
done
# If the arp is in stale or delay status, stop vswitchd will lead prob failed.
# Wait a while for prob ready.
# As the timeout has been increased existing entry will not change to stale or delay at the moment
sleep 5
/usr/share/ovn/scripts/grace_stop_ovn_controller
/usr/share/openvswitch/scripts/ovs-ctl stop
exit 0
}
trap quit EXIT

Expand Down Expand Up @@ -253,5 +262,12 @@ set -e

ovs-vsctl --no-wait set open_vswitch . other_config:flow-restore-wait="false"

set +e
for netns in /var/run/netns/*; do
nsenter --net=$netns sysctl -w net.ipv4.neigh.eth0.base_reachable_time_ms=30000;
nsenter --net=$netns sysctl -w net.ipv4.neigh.eth0.gc_stale_time=60;
done
set -e

chmod 600 /etc/openvswitch/*
tail --follow=name --retry /var/log/ovn/ovn-controller.log

0 comments on commit 1510905

Please sign in to comment.