Hello,
I have a really big problem, I investigated a lot about it, but I dont know where to lookup now.
My problem :
Randomly, when I try to call my ingresses, I got a 503
My ingress configuration :
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
annotations:
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"networking.k8s.io/v1beta1","kind":"Ingress","metadata":{"annotations":{},"name":"galleries","namespace":"default"},"spec":{"rules":[{"host":"galleries.k8s.com","http":{"paths":[{"backend":{"serviceName":"galleries","servicePort":8080},"path":"/"}]}}]}}
creationTimestamp: "2019-07-16T09:47:51Z"
generation: 1
name: galleries
namespace: default
resourceVersion: "537902"
selfLink: /apis/extensions/v1beta1/namespaces/default/ingresses/galleries
uid: 41d08268-3b85-429a-82c8-a8aecaaffa1d
spec:
rules:
- host: galleries.k8s.com
http:
paths:
- backend:
serviceName: galleries
servicePort: 8080
path: /
status:
loadBalancer: {}
My haproxy configmap:
apiVersion: v1
data:
check: enabled
dynamic-scaling: "true"
forwarded-for: enabled
load-balance: roundrobin
maxconn: "2000"
nbthread: "1"
rate-limit: "OFF"
rate-limit-expire: 30m
rate-limit-interval: 10s
rate-limit-size: 100k
servers-increment: "8"
servers-increment-max-disabled: "66"
ssl-numproc: "0"
ssl-redirect: "OFF"
syslog-endpoint: 10.109.129.58:514
timeout-client: 50s
timeout-connect: 5s
timeout-http-keep-alive: 1m
timeout-http-request: 5s
timeout-queue: 5s
timeout-server: 50s
timeout-tunnel: 1h
kind: ConfigMap
metadata:
annotations:
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"v1","data":{"check":"enabled","forwarded-for":"enabled","load-balance":"roundrobin","maxconn":"2000","nbthread":"1","rate-limit":"OFF","rate-limit-expire":"30m","rate-limit-interval":"10s","rate-limit-size":"100k","servers-increment":"42","servers-increment-max-disabled":"66","ssl-certificate":"default/tls-secret","ssl-numproc":"1","ssl-redirect":"OFF","ssl-redirect-code":"302","timeout-client":"50s","timeout-connect":"5s","timeout-http-keep-alive":"1m","timeout-http-request":"5s","timeout-queue":"5s","timeout-server":"50s","timeout-tunnel":"1h"},"kind":"ConfigMap","metadata":{"annotations":{},"name":"haproxy-configmap","namespace":"default","resourceVersion":"539852"}}
creationTimestamp: "2019-07-16T11:49:16Z"
name: haproxy-configmap
namespace: default
resourceVersion: "2474693"
selfLink: /api/v1/namespaces/default/configmaps/haproxy-configmap
uid: 89cbe998-f611-4bdd-a95c-4eb4f6499bbf
My haproxy deploy :
apiVersion: v1
data:
check: enabled
dynamic-scaling: "true"
forwarded-for: enabled
load-balance: roundrobin
maxconn: "2000"
nbthread: "1"
rate-limit: "OFF"
rate-limit-expire: 30m
rate-limit-interval: 10s
rate-limit-size: 100k
servers-increment: "8"
servers-increment-max-disabled: "66"
ssl-numproc: "0"
ssl-redirect: "OFF"
syslog-endpoint: 10.109.129.58:514
timeout-client: 50s
timeout-connect: 5s
timeout-http-keep-alive: 1m
timeout-http-request: 5s
timeout-queue: 5s
timeout-server: 50s
timeout-tunnel: 1h
kind: ConfigMap
metadata:
annotations:
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"v1","data":{"check":"enabled","forwarded-for":"enabled","load-balance":"roundrobin","maxconn":"2000","nbthread":"1","rate-limit":"OFF","rate-limit-expire":"30m","rate-limit-interval":"10s","rate-limit-size":"100k","servers-increment":"42","servers-increment-max-disabled":"66","ssl-certificate":"default/tls-secret","ssl-numproc":"1","ssl-redirect":"OFF","ssl-redirect-code":"302","timeout-client":"50s","timeout-connect":"5s","timeout-http-keep-alive":"1m","timeout-http-request":"5s","timeout-queue":"5s","timeout-server":"50s","timeout-tunnel":"1h"},"kind":"ConfigMap","metadata":{"annotations":{},"name":"haproxy-configmap","namespace":"default","resourceVersion":"539852"}}
creationTimestamp: "2019-07-16T11:49:16Z"
name: haproxy-configmap
namespace: default
resourceVersion: "2474693"
selfLink: /api/v1/namespaces/default/configmaps/haproxy-configmap
uid: 89cbe998-f611-4bdd-a95c-4eb4f6499bbf
root@k8s-qa001:~# kubectl get deploy haproxy-ingress -o yaml
Error from server (NotFound): deployments.extensions "haproxy-ingress" not found
root@k8s-qa001:~# kubectl get deploy haproxy-ingress -o yaml -n haproxy-controller
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
annotations:
deployment.kubernetes.io/revision: "2"
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"annotations":{},"labels":{"run":"haproxy-ingress"},"name":"haproxy-ingress","namespace":"haproxy-controller"},"spec":{"replicas":3,"selector":{"matchLabels":{"run":"haproxy-ingress"}},"template":{"metadata":{"labels":{"run":"haproxy-ingress"}},"spec":{"containers":[{"args":["--configmap=default/haproxy-configmap","--default-backend-service=$(POD_NAMESPACE)/ingress-default-backend"],"env":[{"name":"POD_NAME","valueFrom":{"fieldRef":{"fieldPath":"metadata.name"}}},{"name":"POD_NAMESPACE","valueFrom":{"fieldRef":{"fieldPath":"metadata.namespace"}}}],"image":"haproxytech/kubernetes-ingress","livenessProbe":{"httpGet":{"path":"/healthz","port":1042}},"name":"haproxy-ingress","ports":[{"containerPort":80,"name":"http"},{"containerPort":443,"name":"https"},{"containerPort":1024,"name":"stat"}],"resources":{"requests":{"cpu":"500m","memory":"50Mi"}}}],"serviceAccountName":"haproxy-ingress-service-account"}}}}
creationTimestamp: "2019-07-26T15:32:06Z"
generation: 20
labels:
run: haproxy-ingress
name: haproxy-ingress
namespace: haproxy-controller
resourceVersion: "2484985"
selfLink: /apis/extensions/v1beta1/namespaces/haproxy-controller/deployments/haproxy-ingress
uid: bf077817-cf37-45f6-9db7-4ea8162db899
spec:
progressDeadlineSeconds: 600
replicas: 3
revisionHistoryLimit: 10
selector:
matchLabels:
run: haproxy-ingress
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
creationTimestamp: null
labels:
run: haproxy-ingress
spec:
containers:
- args:
- --configmap=default/haproxy-configmap
- --default-backend-service=$(POD_NAMESPACE)/ingress-default-backend
env:
- name: POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
image: haproxytech/kubernetes-ingress
imagePullPolicy: Always
livenessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 1042
scheme: HTTP
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 1
name: haproxy-ingress
ports:
- containerPort: 80
name: http
protocol: TCP
- containerPort: 443
name: https
protocol: TCP
- containerPort: 1024
name: stat
protocol: TCP
resources:
requests:
cpu: 500m
memory: 50Mi
securityContext:
capabilities:
add:
- SYS_PTRACE
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
serviceAccount: haproxy-ingress-service-account
serviceAccountName: haproxy-ingress-service-account
terminationGracePeriodSeconds: 30
status:
availableReplicas: 3
conditions:
- lastTransitionTime: "2019-07-26T15:32:06Z"
lastUpdateTime: "2019-07-30T13:42:50Z"
message: ReplicaSet "haproxy-ingress-6bd9fbdf6" has successfully progressed.
reason: NewReplicaSetAvailable
status: "True"
type: Progressing
- lastTransitionTime: "2019-07-30T14:56:30Z"
lastUpdateTime: "2019-07-30T14:56:30Z"
message: Deployment has minimum availability.
reason: MinimumReplicasAvailable
status: "True"
type: Available
observedGeneration: 20
readyReplicas: 3
replicas: 3
updatedReplicas: 3
My investigations :
1 - The problem append ONLY when I scale haproxy-controller pods
2 - The problem append ONLY when haproxy service call an haproxy pods outside of current node (I've already tried to set externalTrafficPolicy as Local or Cluster anyway should works only for nodeport access not ingress ?).
I can prouve it by watching iptables rules packet matching :
Chain KUBE-SVC-POGEE3ZVCPTG4ZOO (8 references)
pkts bytes target prot opt in out source destination
Other node => HTTP 503 NOK 0 0 KUBE-SEP-UHN4AYUCBJBZCMCX all -- * * 0.0.0.0/0 0.0.0.0/0 statistic mode random probability 0.33332999982
Other node => HTTP 503 NOK 0 0 KUBE-SEP-DXIK5R47GRH46BRL all -- * * 0.0.0.0/0 0.0.0.0/0 statistic mode random probability 0.50000000000
Pod on node where I call ingress => HTTP 200 OK 0 0 KUBE-SEP-3ZVFJIVQPV4GJPB7 all -- * * 0.0.0.0/0 0.0.0.0/0
3 - Packets can reach other node's haproxy controller
Using tcpdump inside haproxy pod on other node , I can see that haproxy pod on other node can see haproxy request (here SNAT because exttrafpol is Cluster) :
15:21:40.869646 IP (tos 0x0, ttl 62, id 21860, offset 0, flags [DF], proto TCP (6), length 60)
192.168.88.192.34824 > 192.168.167.125.80: Flags [S], cksum 0x614c (correct), seq 2332571645, win 29200, options [mss 1460,sackOK,TS val 1159553689 ecr 0,nop,wscale 7], length 0
0x0000: 4500 003c 5564 4000 3e06 65c9 c0a8 58c0 E..<Ud@.>.e...X.
0x0010: c0a8 a77d 8808 0050 8b08 37fd 0000 0000 ...}...P..7.....
0x0020: a002 7210 614c 0000 0204 05b4 0402 080a ..r.aL..........
0x0030: 451d 6299 0000 0000 0103 0307 E.b.........
15:21:40.869671 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
192.168.167.125.80 > 192.168.88.192.34824: Flags [S.], cksum 0x81bd (incorrect -> 0x308c), seq 2402364309, ack 2332571646, win 27760, options [mss 1400,sackOK,TS val 275540824 ecr 1159553689,nop,wscale 7], length 0
0x0000: 4500 003c 0000 4000 4006 b92d c0a8 a77d E..<..@[email protected]...}
0x0010: c0a8 58c0 0050 8808 8f31 2b95 8b08 37fe ..X..P...1+...7.
0x0020: a012 6c70 81bd 0000 0204 0578 0402 080a ..lp.......x....
0x0030: 106c 6b58 451d 6299 0103 0307 .lkXE.b.....
15:21:41.892087 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
192.168.167.125.80 > 192.168.88.192.34824: Flags [S.], cksum 0x81bd (incorrect -> 0x2c8d), seq 2402364309, ack 2332571646, win 27760, options [mss 1400,sackOK,TS val 275541847 ecr 1159553689,nop,wscale 7], length 0
0x0000: 4500 003c 0000 4000 4006 b92d c0a8 a77d E..<..@[email protected]...}
0x0010: c0a8 58c0 0050 8808 8f31 2b95 8b08 37fe ..X..P...1+...7.
0x0020: a012 6c70 81bd 0000 0204 0578 0402 080a ..lp.......x....
0x0030: 106c 6f57 451d 6299 0103 0307 .loWE.b.....
15:21:43.908068 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
192.168.167.125.80 > 192.168.88.192.34824: Flags [S.], cksum 0x81bd (incorrect -> 0x24ad), seq 2402364309, ack 2332571646, win 27760, options [mss 1400,sackOK,TS val 275543863 ecr 1159553689,nop,wscale 7], length 0
0x0000: 4500 003c 0000 4000 4006 b92d c0a8 a77d E..<..@[email protected]...}
0x0010: c0a8 58c0 0050 8808 8f31 2b95 8b08 37fe ..X..P...1+...7.
0x0020: a012 6c70 81bd 0000 0204 0578 0402 080a ..lp.......x....
0x0030: 106c 7737 451d 6299 0103 0307 .lw7E.b.....
15:21:47.940069 IP (tos 0x0, ttl 64, id 0, offset 0, flags [DF], proto TCP (6), length 60)
192.168.167.125.80 > 192.168.88.192.34824: Flags [S.], cksum 0x81bd (incorrect -> 0x14ed), seq 2402364309, ack 2332571646, win 27760, options [mss 1400,sackOK,TS val 275547895 ecr 1159553689,nop,wscale 7], length 0
0x0000: 4500 003c 0000 4000 4006 b92d c0a8 a77d E..<..@[email protected]...}
0x0010: c0a8 58c0 0050 8808 8f31 2b95 8b08 37fe ..X..P...1+...7.
0x0020: a012 6c70 81bd 0000 0204 0578 0402 080a ..lp.......x....
0x0030: 106c 86f7 451d 6299 0103 0307 .l..E.b.....
4 - haproxy process can't see request
Lets get strace of the process during the requests :
/ # strace -p 387 2>&1 | tee /tmp/trace
Lets analyze it after request .
I can see the haproxy checks :
/ # grep 192.168.167.108 /tmp/trace
connect(15, {sa_family=AF_INET, sin_port=htons(9000), sin_addr=inet_addr("192.168.167.108")}, 16) = -1 EINPROGRESS (Operation in progress)
connect(15, {sa_family=AF_INET, sin_port=htons(9000), sin_addr=inet_addr("192.168.167.108")}, 16) = 0
connect(15, {sa_family=AF_INET, sin_port=htons(9000), sin_addr=inet_addr("192.168.167.108")}, 16) = -1 EINPROGRESS (Operation in progress)
connect(15, {sa_family=AF_INET, sin_port=htons(9000), sin_addr=inet_addr("192.168.167.108")}, 16) = 0
connect(15, {sa_family=AF_INET, sin_port=htons(9000), sin_addr=inet_addr("192.168.167.108")}, 16) = -1 EINPROGRESS (Operation in progress)
connect(15, {sa_family=AF_INET, sin_port=htons(9000), sin_addr=inet_addr("192.168.167.108")}, 16) = 0
connect(15, {sa_family=AF_INET, sin_port=htons(9000), sin_addr=inet_addr("192.168.167.108")}, 16) = -1 EINPROGRESS (Operation in progress)
connect(15, {sa_family=AF_INET, sin_port=htons(9000), sin_addr=inet_addr("192.168.167.108")}, 16) = 0
connect(15, {sa_family=AF_INET, sin_port=htons(9000), sin_addr=inet_addr("192.168.167.108")}, 16) = -1 EINPROGRESS (Operation in progress)
connect(15, {sa_family=AF_INET, sin_port=htons(9000), sin_addr=inet_addr("192.168.167.108")}, 16) = 0
connect(15, {sa_family=AF_INET, sin_port=htons(9000), sin_addr=inet_addr("192.168.167.108")}, 16) = -1 EINPROGRESS (Operation in progress)
connect(15, {sa_family=AF_INET, sin_port=htons(9000), sin_addr=inet_addr("192.168.167.108")}, 16) = 0
BUT, I cant find any connection of my remote node IP ...
/ # grep 192.168.88.192 /tmp/trace
/ #
Can someone help me ?