dpvs nat模式
生活随笔
收集整理的這篇文章主要介紹了
dpvs nat模式
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
NAT模式限制
-
NAT為雙臂模式
-
拓撲實例
- 如上圖所示,各節點的IP地址如下:
- client: 192.168.0.46
- VIP: 192.168.0.89
- DPVS local ip: 192.168.0.66, 10.140.31.48
- RS1: 10.140.18.33
- RS2: 10.140.18.33
- 可見,所有的IP地址都在一個局域網內
- 如上圖所示,各節點的IP地址如下:
-
NAT模式約束
- DPVS-NAT模式只能在單個lcore中工作。由于以上原因,dpvs很難支持多lcore NAT轉發模式
- DPVS會話條目通過RSS在lcore上進行拆分和分發
- NAT轉發要求正、反向流量都通過DPVS
- NAT轉發只轉換dest IP/端口,不更改源IP/端口
- NIC的fdir規則設置有限
- 因此,如果沒有對流量的其他控制,則出站數據包可能到達與入站數據包不同的lcore。如果是,出站數據包將被丟棄,因為會話查找未命中
- FNAT通過使用Flow Director(FDIR)解決了這個問題。但是,對于NIC,可以添加的規則非常有限,例如,對于XT-540,可以添加8k,與FNAT不同,NAT沒有(本地ip,端口),因此只能在(源ip,端口)上設置FDIR規則,這意味著只支持數千個并發。因此,FDIR不適用于NAT
- 注意:從v1.7.2開始,就為多lcore NAT模式轉發提供了解決方案。其原理是通過全局重定向表和一些無鎖環將出站數據包重定向到其會話項所在的正確的lcore。當然,這在一定程度上會損害性能。如果要使用它,請在/etc/dpvs.conf中打開配置開關“ipvs_defs/conn/redirect”
- DPVS-NAT模式只能在單個lcore中工作。由于以上原因,dpvs很難支持多lcore NAT轉發模式
NAT模式原理?
-
對于inbound方向的流量,實際上做的是dnat,將目標ip由lb ip轉換成真正的rs ip,此時后端rs是能拿到client ip的。outbond的流量做snat,將源地址換成lb ip
-
三層處理ipv4_rcv
- 由于nat不做syn_proxy,所以直接看dp_vs_in
-
dp_vs_conn_bind_dest
static int dp_vs_conn_bind_dest(struct dp_vs_conn *conn,struct dp_vs_dest *dest) {...switch (dest->fwdmode){case DPVS_FWD_MODE_NAT:conn->packet_xmit = dp_vs_xmit_nat;conn->packet_out_xmit = dp_vs_out_xmit_nat;break;...}conn->dest = dest;return(EDPVS_OK); }
?inbound方向處理
-
dp_vs_xmit_nat
/*** 包裹函數,根據協議族執行不同的內部函數* 參數說明:* proto: 傳輸層dp_vs_proto結構,對于tcp協議為tcp_proto* conn: 對應的連接* mbuf: inbound接收的mbuf*/ int dp_vs_xmit_nat(struct dp_vs_proto *proto,struct dp_vs_conn *conn,struct rte_mbuf *mbuf) {int af = conn->af;assert(af == AF_INET || af == AF_INET6);//根據協議族執行內部函數return(af == AF_INET ? __dp_vs_xmit_nat4(proto, conn, mbuf): __dp_vs_xmit_nat6(proto, conn, mbuf)); } -
__dp_vs_out_xmit_nat4
/*** nat模式下,inbound流量處理,主要做dnat,將目的ip由DPVS轉換成真正的RS ip,此時后端RS是能夠拿到正確的client ip的* 參數說明:* proto: 傳輸層dp_vs_proto結構,對于tcp協議為tcp_proto* conn: 對應的連接* mbuf: inbound接收的mbuf*/ static int __dp_vs_xmit_nat4(struct dp_vs_proto *proto,struct dp_vs_conn *conn,struct rte_mbuf *mbuf) {struct flow4 fl4;struct ipv4_hdr * iph = ip4_hdr(mbuf);struct route_entry *rt;int err, mtu;if (!fast_xmit_close && !(conn->flags & DPVS_CONN_F_NOFASTXMIT)){dp_vs_save_xmit_info(mbuf, proto, conn);if (!dp_vs_fast_xmit_nat(proto, conn, mbuf)){return(EDPVS_OK);}}/** drop old route. just for safe, because* NAT is PREROUTING, should not have route.*///釋放old路由緩存信息if (unlikely(mbuf->userdata != NULL)){RTE_LOG(WARNING, IPVS, "%s: NAT have route %p ?\\n",__func__, mbuf->userdata);route4_put((struct route_entry *)mbuf->userdata);}//查找出口路由,此處查找時fl4_daddr設置成RS的daddr,因為需要dpvs將mbuf中目的ip轉換成真正的rs ipmemset(&fl4, 0, sizeof(struct flow4));fl4.fl4_daddr = conn->daddr.in;//nat模式下client ip不變fl4.fl4_saddr = conn->caddr.in;fl4.fl4_tos = iph->type_of_service;rt = route4_output(&fl4);//如果未找到出口路由,則丟棄數據報返回if (!rt){err = EDPVS_NOROUTE;goto errout;}//通過出口路由設置conn緩存字段dp_vs_conn_cache_rt(conn, rt, true);mtu = rt->mtu;if (mbuf->pkt_len > mtu &&(iph->fragment_offset & htons(IPV4_HDR_DF_FLAG))){RTE_LOG(DEBUG, IPVS, "%s: frag needed.\\n", __func__);icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG,htonl(rt->mtu));err = EDPVS_FRAG;goto errout;}//設置mbuf的路由緩存項mbuf->userdata = rt;/* after route lookup and before translation *///遞減ttl,如果遞減后ttl=0,則丟棄數據報,并通過icmp通知錯誤if (xmit_ttl){if (unlikely(iph->time_to_live <= 1)){icmp_send(mbuf, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);err = EDPVS_DROP;goto errout;}iph->time_to_live--;}/* L3 translation before l4 re-csum *///清零ip層chksum,因為ip層首部字段會發生變化,需要重新計算校驗和iph->hdr_checksum = 0;//將數據報中目的ip地址替換為后端rs地址iph->dst_addr = conn->daddr.in.s_addr;/* L4 NAT translation *///執行nat_in_handler,tcp中為tcp_snat_in_handlerif (proto->nat_in_handler){err = proto->nat_in_handler(proto, conn, mbuf);if (err != EDPVS_OK){goto errout;}}//如果網卡支持硬件校驗和,則設置校驗和為0由硬件計算,否則軟件計算校驗和if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)){iph->hdr_checksum = 0;}else{ip4_send_csum(iph);}//回調 INET_HOOK_LOCAL_OUT 鏈注冊的回調,dpvs中目前沒有注冊hook函數,所以實際就是調用ipv4_output,發送mbufreturn(INET_HOOK(AF_INET, INET_HOOK_LOCAL_OUT, mbuf,NULL, rt->port, ipv4_output));errout:if (rt){route4_put(rt);}rte_pktmbuf_free(mbuf);return(err); } -
tcp_snat_in_handler
- 主要執行tcp層校驗和計算
- 由于nat模式下會修改ip首部源地址或者目的地址,tcp首部中偽首部會發生變化,導致tcp校驗和會發生變化
?outbound方向處理
-
dp_vs_out_xmit_nat
//包裹函數,根據協議族執行不同的內部函數 int dp_vs_out_xmit_nat(struct dp_vs_proto *proto,struct dp_vs_conn *conn,struct rte_mbuf *mbuf) {int af = conn->af;assert(af == AF_INET || af == AF_INET6);return(af == AF_INET ? __dp_vs_out_xmit_nat4(proto, conn, mbuf): __dp_vs_out_xmit_nat6(proto, conn, mbuf)); } -
__dp_vs_out_xmit_nat4
/*** nat模式下,outbound方向傳輸函數,流量做snat,將源地址換成lb ip* proto: 傳輸層協議實例* conn: 連接信息* mbuf: 從RS server--->LB的數據報,需要做snat后傳輸至外部客戶端*/ static int __dp_vs_out_xmit_nat4(struct dp_vs_proto *proto,struct dp_vs_conn *conn,struct rte_mbuf *mbuf) {struct flow4 fl4;//iph指向mbuf中ip首部struct ipv4_hdr * iph = ip4_hdr(mbuf);struct route_entry *rt;int err, mtu;if (!fast_xmit_close && !(conn->flags & DPVS_CONN_F_NOFASTXMIT)){dp_vs_save_outxmit_info(mbuf, proto, conn);if (!dp_vs_fast_outxmit_nat(proto, conn, mbuf)){return(EDPVS_OK);}}/** drop old route. just for safe, because* NAT is PREROUTING, should not have route.*///mbuf->userdata中存放路由緩存項if (unlikely(mbuf->userdata != NULL)){RTE_LOG(WARNING, IPVS, "%s: NAT have route %p ?\\n",__func__, mbuf->userdata);route4_put((struct route_entry *)mbuf->userdata);}//查找出口路由,目的地址為client ip,源地址更改為LB vipmemset(&fl4, 0, sizeof(struct flow4));fl4.fl4_daddr = conn->caddr.in;fl4.fl4_saddr = conn->vaddr.in;fl4.fl4_tos = iph->type_of_service;rt = route4_output(&fl4);if (!rt){err = EDPVS_NOROUTE;goto errout;}//根據rt信息更新conn cachedp_vs_conn_cache_rt(conn, rt, false);//如果數據報長度超過出口路由的mtu,并且數據報中有DF標記,則丟棄數據報,并通過icmp回復錯誤消息mtu = rt->mtu;if (mbuf->pkt_len > mtu &&(iph->fragment_offset & htons(IPV4_HDR_DF_FLAG))){RTE_LOG(DEBUG, IPVS, "%s: frag needed.\\n", __func__);icmp_send(mbuf, ICMP_DEST_UNREACH, ICMP_UNREACH_NEEDFRAG,htonl(rt->mtu));err = EDPVS_FRAG;goto errout;}//更新mbuf的路由緩存項mbuf->userdata = rt;/* after route lookup and before translation *///遞減ttl并做出錯處理if (xmit_ttl){if (unlikely(iph->time_to_live <= 1)){icmp_send(mbuf, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);err = EDPVS_DROP;goto errout;}iph->time_to_live--;}/* L3 translation before l4 re-csum *///ip首部校驗和清零,snat會修改ip首部源ip地址iph->hdr_checksum = 0;//源ip地址更改為vip地址iph->src_addr = conn->vaddr.in.s_addr;/* L4 NAT translation *///執行nat_out_handler,主要重新計算傳輸層的校驗和,tcp中為tcp_snat_out_handlerif (proto->nat_out_handler){err = proto->nat_out_handler(proto, conn, mbuf);if (err != EDPVS_OK){goto errout;}}//重新計算IP層校驗和if (likely(mbuf->ol_flags & PKT_TX_IP_CKSUM)){iph->hdr_checksum = 0;}else{ip4_send_csum(iph);}//INET_HOOK_LOCAL_OUT處暫時沒有hook_ops,等于直接調用ipv4_outputreturn(INET_HOOK(AF_INET, INET_HOOK_LOCAL_OUT, mbuf,NULL, rt->port, ipv4_output));errout:if (rt){route4_put(rt);}rte_pktmbuf_free(mbuf);return(err); } -
tcp_snat_out_handler
//snat中重新計算tcp首部校驗和 static int tcp_snat_out_handler(struct dp_vs_proto *proto,struct dp_vs_conn *conn, struct rte_mbuf *mbuf) {struct tcphdr *th;int af = conn->af;int iphdrlen = ((AF_INET6 == af) ? ip6_hdrlen(mbuf) : ip4_hdrlen(mbuf));if (mbuf_may_pull(mbuf, iphdrlen + sizeof(*th)) != 0){return(EDPVS_INVPKT);}th = tcp_hdr(mbuf);if (unlikely(!th)){return(EDPVS_INVPKT);}if (mbuf_may_pull(mbuf, iphdrlen + (th->doff << 2)) != 0){return(EDPVS_INVPKT);}/* L4 translation */th->source = conn->vport;/* L4 re-checksum */return(tcp_send_csum(af, iphdrlen, th, conn, mbuf)); }
?nat 模式配置實例
-
拓撲,如開頭
-
配置
-
DPVS configs
## DPVS configs ## # config LAN network on bond0, routes will generate automatically ./dpip addr add 192.168.0.66/24 dev bond0 ./dpip addr add 10.140.31.48/20 dev bond0# add service <VIP:vport> to forwarding, scheduling mode is RR ./ipvsadm -A -t 192.168.0.89:80 -r 10.140.18.33 -m ./ipvsadm -A -t 192.168.0.89:80 -r 10.140.18.34 -m# add VIP and the route will generate automatically ./dpip addr add 192.168.0.89/32 dev bond0 -
keepalived configs
static_ipaddress {192.168.0.66/24 dev bond010.140.31.48/20 dev bond0 }virtual_server_group vip_nat {192.168.0.89 80 }virtual_server group vip_nat {protocol tcplb_algo rrlb_kind NATreal server 10.140.18.33 80 {weight 100inhibit_on_failureTCP_CHECK {nb_sock_retry 2connect_timeout 3connect_port 80}}real server 10.140.18.34 80 {weight 100inhibit_on_failureTCP_CHECK {nb_sock_retry 2connect_timeout 3connect_port 80}} } -
On RSs, back routes should be pointed to DPVS
## for each real server ip route add 192.168.0.0/24 via 10.140.31.48 dev eth0
-
-
Now you can test DPVS NAT mode
client$ curl 192.168.0.89:80 Hi, I am 10.140.18.33 client$ curl 192.168.0.89:80 Hi, I am 10.140.18.34
總結
以上是生活随笔為你收集整理的dpvs nat模式的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: U盘格式化后能恢复数据吗?U盘删除的数据
- 下一篇: 用计算机用u盘怎么切换,U盘一插进电脑提