apiVersion: argoproj.io/v1alpha1 kind: CronWorkflow metadata: generateName: argowf-chaos-sock-shop-resiliency-cron-wf- namespace: litmus spec: schedule: "0 * * * *" concurrencyPolicy: "Forbid" startingDeadlineSeconds: 0 workflowSpec: entrypoint: argowf-chaos serviceAccountName: argo-chaos securityContext: runAsUser: 1000 runAsNonRoot: true arguments: parameters: - name: adminModeNamespace value: "litmus" templates: - name: argowf-chaos steps: - - name: install-application template: install-application - - name: install-chaos-experiments template: install-chaos-experiments - name: load-test template: load-test - - name: pod-cpu-hog template: pod-cpu-hog - - name: pod-delete template: pod-delete - - name: pod-network-loss template: pod-network-loss - - name: pod-memory-hog template: pod-memory-hog - - name: disk-fill template: disk-fill - - name: revert-chaos template: revert-chaos # - name: delete-application # template: delete-application - name: delete-loadtest template: delete-loadtest - name: install-application container: image: litmuschaos/litmus-app-deployer:latest args: ["-namespace=sock-shop","-typeName=resilient","-operation=apply","-timeout=400", "-app=sock-shop","-scope=cluster"] #for weak provide type flagName as resilient(-typeName=weak) - name: install-chaos-experiments container: image: litmuschaos/k8s:latest command: [sh, -c] args: - "kubectl apply -f https://hub.litmuschaos.io/api/chaos/master?file=charts/generic/experiments.yaml -n {{workflow.parameters.adminModeNamespace}} ; sleep 30" - name: pod-cpu-hog inputs: artifacts: - name: pod-cpu-hog path: /tmp/chaosengine.yaml raw: data: | apiVersion: litmuschaos.io/v1alpha1 kind: ChaosEngine metadata: name: pod-cpu-hog-chaos namespace: {{workflow.parameters.adminModeNamespace}} spec: appinfo: appns: 'sock-shop' applabel: 'name=carts' appkind: 'deployment' jobCleanUpPolicy: retain engineState: 'active' chaosServiceAccount: litmus-admin experiments: - name: pod-cpu-hog spec: probe: - name: "check-frontend-access-url" type: "httpProbe" httpProbe/inputs: url: "http://front-end.sock-shop.svc.cluster.local:80" insecureSkipVerify: false responseTimeout: 100 method: get: criteria: "==" responseCode: "200" mode: "Continuous" runProperties: probeTimeout: 2 interval: 1 retry: 2 probePollingInterval: 1 - name: "check-probe-success" type: "promProbe" promProbe/inputs: endpoint: "http://prometheus-k8s.monitoring.svc.cluster.local:9090" query: "sum(rate(request_duration_seconds_count{job='sock-shop/front-end',route='/',status_code='200'}[20s]))*100" comparator: criteria: ">=" #supports >=,<=,>,<,==,!= comparision value: "100" mode: "Edge" runProperties: probeTimeout: 2 interval: 1 retry: 2 components: env: #number of cpu cores to be consumed #verify the resources the app has been launched with - name: CPU_CORES value: '1' - name: TOTAL_CHAOS_DURATION value: '30' # in seconds - name: CHAOS_KILL_COMMAND value: "kill -9 $(ps afx | grep \"[md5sum] /dev/zero\" | awk '{print$1}' | tr '\n' ' ')" container: image: litmuschaos/litmus-checker:latest args: ["-file=/tmp/chaosengine.yaml","-saveName=/tmp/engine-name"] - name: pod-memory-hog inputs: artifacts: - name: pod-memory-hog path: /tmp/chaosengine.yaml raw: data: | apiVersion: litmuschaos.io/v1alpha1 kind: ChaosEngine metadata: name: pod-memory-hog-chaos namespace: {{workflow.parameters.adminModeNamespace}} spec: appinfo: appns: 'sock-shop' applabel: 'name=orders' appkind: 'deployment' jobCleanUpPolicy: retain engineState: 'active' chaosServiceAccount: litmus-admin experiments: - name: pod-memory-hog spec: probe: - name: "check-frontend-access-url" type: "httpProbe" httpProbe/inputs: url: "http://front-end.sock-shop.svc.cluster.local:80" insecureSkipVerify: false responseTimeout: 100 method: get: criteria: "==" responseCode: "200" mode: "Continuous" runProperties: probeTimeout: 2 interval: 1 retry: 2 probePollingInterval: 1 - name: "check-probe-success" type: "promProbe" promProbe/inputs: endpoint: "http://prometheus-k8s.monitoring.svc.cluster.local:9090" query: "sum(rate(request_duration_seconds_count{job='sock-shop/front-end',route='/',status_code='200'}[20s]))*100" comparator: criteria: ">=" #supports >=,<=,>,<,==,!= comparision value: "100" mode: "Edge" runProperties: probeTimeout: 2 interval: 1 retry: 2 components: env: - name: MEMORY_CONSUMPTION value: '500' - name: TOTAL_CHAOS_DURATION value: '30' # in seconds - name: CHAOS_KILL_COMMAND value: "kill -9 $(ps afx | grep \"[dd] if /dev/zero\" | awk '{print $1}' | tr '\n' ' ')" container: image: litmuschaos/litmus-checker:latest args: ["-file=/tmp/chaosengine.yaml","-saveName=/tmp/engine-name"] - name: pod-delete inputs: artifacts: - name: pod-delete path: /tmp/chaosengine.yaml raw: data: | apiVersion: litmuschaos.io/v1alpha1 kind: ChaosEngine metadata: name: catalogue-pod-delete-chaos namespace: {{workflow.parameters.adminModeNamespace}} spec: appinfo: appns: 'sock-shop' applabel: 'name=catalogue' appkind: 'deployment' engineState: 'active' chaosServiceAccount: litmus-admin jobCleanUpPolicy: 'retain' components: runner: imagePullPolicy: Always experiments: - name: pod-delete spec: probe: - name: "check-catalogue-access-url" type: "httpProbe" httpProbe/inputs: url: "http://front-end.sock-shop.svc.cluster.local:80/catalogue" insecureSkipVerify: false responseTimeout: 100 method: get: criteria: "==" responseCode: "200" mode: "Continuous" runProperties: probeTimeout: 12 interval: 12 retry: 3 probePollingInterval: 1 - name: "check-probe-success" type: "promProbe" promProbe/inputs: endpoint: "http://prometheus-k8s.monitoring.svc.cluster.local:9090" query: "sum(rate(request_duration_seconds_count{job='sock-shop/front-end',route='/',status_code='200'}[20s]))*100" comparator: criteria: ">=" #supports >=,<=,>,<,==,!= comparision value: "100" mode: "Edge" runProperties: probeTimeout: 2 interval: 1 retry: 2 components: env: - name: TOTAL_CHAOS_DURATION value: '30' # set chaos interval (in sec) as desired - name: CHAOS_INTERVAL value: '10' # pod failures without '--force' & default terminationGracePeriodSeconds - name: FORCE value: 'false' container: image: litmuschaos/litmus-checker:latest args: ["-file=/tmp/chaosengine.yaml","-saveName=/tmp/engine-name"] - name: pod-network-loss inputs: artifacts: - name: pod-network-loss path: /tmp/chaosengine.yaml raw: data: | apiVersion: litmuschaos.io/v1alpha1 kind: ChaosEngine metadata: name: pod-network-loss-chaos namespace: {{workflow.parameters.adminModeNamespace}} spec: appinfo: appns: 'sock-shop' applabel: 'name=user-db' appkind: 'statefulset' jobCleanUpPolicy: retain engineState: 'active' auxiliaryAppInfo: '' chaosServiceAccount: litmus-admin components: runner: imagePullPolicy: Always experiments: - name: pod-network-loss spec: probe: - name: "check-cards-access-url" type: "httpProbe" httpProbe/inputs: url: "http://front-end.sock-shop.svc.cluster.local:80/cards" insecureSkipVerify: false responseTimeout: 100 method: get: criteria: "==" responseCode: "200" mode: "Continuous" runProperties: probeTimeout: 12 interval: 12 retry: 3 probePollingInterval: 1 - name: "check-probe-success" type: "promProbe" promProbe/inputs: endpoint: "http://prometheus-k8s.monitoring.svc.cluster.local:9090" query: "sum(rate(request_duration_seconds_count{job='sock-shop/front-end',route='/',status_code='200'}[20s]))*100" comparator: criteria: ">=" #supports >=,<=,>,<,==,!= comparision value: "100" mode: "Edge" runProperties: probeTimeout: 2 interval: 1 retry: 2 components: env: - name: TOTAL_CHAOS_DURATION value: '30' - name: NETWORK_INTERFACE value: 'eth0' - name: NETWORK_PACKET_LOSS_PERCENTAGE value: '100' - name: CONTAINER_RUNTIME value: 'docker' - name: SOCKET_PATH value: '/var/run/docker.sock' container: image: litmuschaos/litmus-checker:latest args: ["-file=/tmp/chaosengine.yaml","-saveName=/tmp/engine-name"] - name: disk-fill inputs: artifacts: - name: disk-fill path: /tmp/chaosengine.yaml raw: data: | apiVersion: litmuschaos.io/v1alpha1 kind: ChaosEngine metadata: name: catalogue-disk-fill namespace: {{workflow.parameters.adminModeNamespace}} spec: appinfo: appns: 'sock-shop' applabel: 'name=catalogue-db' appkind: 'statefulset' engineState: 'active' chaosServiceAccount: litmus-admin jobCleanUpPolicy: 'retain' components: runner: imagePullPolicy: Always experiments: - name: disk-fill spec: probe: - name: "check-catalogue-db-cr-status" type: "k8sProbe" k8sProbe/inputs: group: "" version: "v1" resource: "pods" namespace: "sock-shop" fieldSelector: "status.phase=Running" labelSelector: "name=catalogue-db" operation: "present" mode: "Continuous" runProperties: probeTimeout: 1 interval: 1 retry: 1 probePollingInterval: 1 - name: "check-probe-success" type: "promProbe" promProbe/inputs: endpoint: "http://prometheus-k8s.monitoring.svc.cluster.local:9090" query: "sum(rate(request_duration_seconds_count{job='sock-shop/front-end',route='/',status_code='200'}[20s]))*100" comparator: criteria: ">=" #supports >=,<=,>,<,==,!= comparision value: "100" mode: "Edge" runProperties: probeTimeout: 2 interval: 1 retry: 2 components: env: - name: FILL_PERCENTAGE value: '100' - name: TARGET_CONTAINER value: '' - name: TOTAL_CHAOS_DURATION value: '30' container: image: litmuschaos/litmus-checker:latest args: ["-file=/tmp/chaosengine.yaml","-saveName=/tmp/engine-name"] - name: delete-application container: image: litmuschaos/litmus-app-deployer:latest args: ["-namespace=sock-shop","-typeName=resilient","-operation=delete", "-app=sock-shop"] - name: load-test container: image: litmuschaos/litmus-app-deployer:latest args: ["-namespace=loadtest", "-app=loadtest"] - name: delete-loadtest container: image: litmuschaos/litmus-app-deployer:latest args: ["-namespace=loadtest","-operation=delete", "-app=loadtest"] - name: revert-chaos container: image: litmuschaos/k8s:latest command: [sh, -c] args: [ "kubectl delete chaosengine pod-memory-hog-chaos pod-cpu-hog-chaos catalogue-pod-delete-chaos pod-network-loss-chaos -n {{workflow.parameters.adminModeNamespace}}", ]