Page MenuHomeVyOS Platform

wan load balance issues with 3 or more WANs
Open, LowPublicBUG

Description

Hello!
I'm getting this issues with wan load balance on all 1.4 versions:
1)VyOS reply to ping on incorrect interfaces
2)On one of the WANs VyOS also replacing source IP of DNAT'ed packets
Also
My configuration:
Interfaces:
ethernet eth0 {

vif 2220 {
    address 172.22.87.2/24
}
vif 2498 {
    address 172.22.1.2/24
}

}
ethernet eth1 {

vif 2494 {
    address 172.22.86.2/24
}

}

show high-availability:

group wan1 {
    address wan1/29 {
    }
    hello-source-address 172.22.87.2
    interface eth0.2220
    peer-address 172.22.87.1
    priority 50
    vrid 87
}
group wan2 {
    address wan2/24 {
    }
    hello-source-address 172.22.1.2
    interface eth0.2498
    peer-address 172.22.1.1
    priority 50
    vrid 98
}
group wan3 {
    address wan3/25 {
    }
    hello-source-address 172.22.86.2
    interface eth1.2494
    peer-address 172.22.86.1
    priority 50
    vrid 86
}

route 0.0.0.0/0 {

next-hop wan2gwip {
    interface eth0.2498
}
next-hop wan3gwip {
    interface eth1.2494
}
next-hop wan1gwip {
    interface eth0.2220
}

}

show load-balancing wan:
enable-local-traffic
flush-connections
interface-health eth0.2220 {

failure-count 1
nexthop wan1gwip
success-count 1

}
interface-health eth0.2498 {

failure-count 1
nexthop wan2gwip
success-count 1

}
interface-health eth1.2494 {

failure-count 1
nexthop wan3gwip
success-count 1

}
sticky-connections {

inbound

}

show nat:
destination {

rule 10 {
    destination {
        port 80,443
    }
    inbound-interface eth0.2498
    protocol tcp
    translation {
        address 10.10.43.22
    }
}
rule 20 {
    destination {
        port 80,443
    }
    inbound-interface eth1.2494
    protocol tcp
    translation {
        address 10.10.43.22
    }
}

}

nft table ip mangle:
chain VYOS_PBR_PREROUTING {

		type filter hook prerouting priority mangle; policy accept;

}

chain VYOS_PBR_POSTROUTING {

		type filter hook postrouting priority mangle; policy accept;

}

chain PREROUTING {

		type filter hook prerouting priority mangle; policy accept;
		iifname "eth1.2494" ct state new counter packets 3622 bytes 179471 jump ISP_eth1.2494_IN
		iifname "eth0.2498" ct state new counter packets 2456 bytes 131350 jump ISP_eth0.2498_IN
		iifname "eth0.2220" ct state new counter packets 2709 bytes 182891 jump ISP_eth0.2220_IN
		counter packets 126513 bytes 9558437 jump WANLOADBALANCE_PRE

}

chain OUTPUT {

		type route hook output priority mangle; policy accept;
		counter packets 177056 bytes 12429353 jump WANLOADBALANCE_OUT

}

chain WANLOADBALANCE_PRE {
}

chain WANLOADBALANCE_OUT {

		mark != 0x0 counter packets 0 bytes 0 accept
		meta l4proto icmp ip protocol icmp counter packets 10814 bytes 721146 accept
		ip saddr 127.0.0.0/8 ip daddr 127.0.0.0/8 counter packets 308 bytes 15400 accept

}

chain ISP_eth0.2220 {

		counter packets 0 bytes 0 ct mark set 0xc9
		counter packets 0 bytes 0 meta mark set 0xc9
		counter packets 0 bytes 0 accept

}

chain ISP_eth0.2220_IN {

		counter packets 2709 bytes 182891 ct mark set 0xc9

}

chain ISP_eth0.2498 {

		counter packets 0 bytes 0 ct mark set 0xca
		counter packets 0 bytes 0 meta mark set 0xca
		counter packets 0 bytes 0 accept

}

chain ISP_eth0.2498_IN {

		counter packets 2456 bytes 131350 ct mark set 0xca

}

chain ISP_eth1.2494 {

		counter packets 0 bytes 0 ct mark set 0xcb
		counter packets 0 bytes 0 meta mark set 0xcb
		counter packets 0 bytes 0 accept

}

chain ISP_eth1.2494_IN {

		counter packets 3622 bytes 179471 ct mark set 0xcb

}

Details

Difficulty level
Unknown (require assessment)
Version
1.4 all rolling releases
Why the issue appeared?
Will be filled on close
Is it a breaking change?
Unspecified (possibly destroys the router)
Issue type
Unspecified (please specify)

Event Timeline

Also I had tried to assign IP addresses directly to wan interfaces to test if it's somehow related to usage of vrrp combined with WLB- it does not work.

Nova_Logic renamed this task from wan load balance issues with 3 WANs to wan load balance issues with 3 or more WANs.Aug 4 2022, 6:55 PM

I had a closer look at this, the commits here and here should have brought this back from the dead but there is a small issue where the table "ip nat" is used when at the same time all of the nat related chains were refactored into the vyos_nat table. The normal functionality of WLB isn't affected because the mangle table isn't changed.

Effectively there is no POSTROUTING jump to VYOS_PRE_SNAT_HOOK so the packet is just routed by any random interface. The resulting table(s) look like this on my system:

table ip vyos_nat {
        chain PREROUTING {
                type nat hook prerouting priority dstnat; policy accept;
                counter packets 5749356 bytes 1224292562 jump VYOS_PRE_DNAT_HOOK
        }

        chain POSTROUTING {
                type nat hook postrouting priority srcnat; policy accept;
                counter packets 873915 bytes 94757245 jump VYOS_PRE_SNAT_HOOK
                oifname "eth0" ip saddr 10.0.0.0/16 counter packets 34410 bytes 11982158 masquerade comment "SRC-NAT-10"
                oifname "eth4" counter packets 201007 bytes 19410208 masquerade comment "SRC-NAT-20"
                oifname "eth5" counter packets 51282 bytes 4848935 masquerade comment "SRC-NAT-30"
                oifname "eth6" counter packets 15438 bytes 1468415 masquerade comment "SRC-NAT-40"
        }

        chain VYOS_PRE_DNAT_HOOK {
                return
        }

        chain VYOS_PRE_SNAT_HOOK {
                return
        }
}

table ip nat {
        chain VYOS_PRE_SNAT_HOOK {
                type nat hook postrouting priority srcnat - 1; policy accept;
                counter packets 0 bytes 0 jump WANLOADBALANCE
                return
        }

        chain WANLOADBALANCE {
                ct mark 0xc9 counter packets 0 bytes 0 snat to 192.168.101.3
                ct mark 0xca counter packets 0 bytes 0 snat to 192.168.102.5
                ct mark 0xcb counter packets 0 bytes 0 snat to 192.168.103.195
        }
}

@Nova_Logic As a workaround can you try the following configuration, this should ensure the behaviour you're after:

policy {
    local-route {
        rule 1 {
            fwmark 201
            inbound-interface eth0
            set {
                table 201
            }
        }
        rule 2 {
            fwmark 202
            inbound-interface eth0
            set {
                table 202
            }
        }
        rule 3 {
            fwmark 203
            inbound-interface eth0
            set {
                table 203
            }
        }
    }
}