Files
litmus-hub/experiments/sock-shop-promProbe/experiment_cron.yaml
Neelanjan Manna d482aa76af chore: Fix experiments charts for 3.0.0 (#613)
* adds metadata.annotation in chaosengine and replaces install-chaos-faults step to use artifacts.data.raw manifests

Signed-off-by: neelanjan00 <neelanjan.manna@harness.io>

* fixes annotation -> annotations

Signed-off-by: neelanjan00 <neelanjan.manna@harness.io>

* updates labels

Signed-off-by: neelanjan00 <neelanjan.manna@harness.io>

---------

Signed-off-by: neelanjan00 <neelanjan.manna@harness.io>
2023-10-04 10:37:55 +05:30

1139 lines
50 KiB
YAML

apiVersion: argoproj.io/v1alpha1
kind: CronWorkflow
metadata:
name: argowf-chaos-sock-shop-resiliency-cron-wf
namespace: litmus
labels:
subject: "{{workflow.parameters.appNamespace}}_sock-shop"
spec:
schedule: "0 * * * *"
concurrencyPolicy: "Forbid"
startingDeadlineSeconds: 0
workflowSpec:
entrypoint: argowf-chaos
serviceAccountName: argo-chaos
securityContext:
runAsUser: 1000
runAsNonRoot: true
arguments:
parameters:
- name: adminModeNamespace
value: "litmus"
- name: appNamespace
value: "sock-shop"
templates:
- name: argowf-chaos
steps:
- - name: install-application
template: install-application
- - name: install-chaos-faults
template: install-chaos-faults
- name: load-test
template: load-test
- - name: pod-cpu-hog
template: pod-cpu-hog
- - name: pod-delete
template: pod-delete
- - name: pod-network-loss
template: pod-network-loss
- - name: pod-memory-hog
template: pod-memory-hog
- - name: disk-fill
template: disk-fill
- - name: cleanup-chaos-resources
template: cleanup-chaos-resources
# - name: delete-application
# template: delete-application
- name: delete-loadtest
template: delete-loadtest
- name: install-application
container:
image: litmuschaos/litmus-app-deployer:latest
args: ["-namespace=sock-shop","-typeName=resilient","-operation=apply","-timeout=400", "-app=sock-shop","-scope=cluster"] #for weak provide type flagName as resilient(-typeName=weak)
- name: install-chaos-faults
inputs:
artifacts:
- name: pod-cpu-hog-2sf
path: /tmp/pod-cpu-hog-2sf.yaml
raw:
data: |
apiVersion: litmuschaos.io/v1alpha1
description:
message: |
Injects CPU consumption on pods belonging to an app deployment
kind: ChaosExperiment
metadata:
name: pod-cpu-hog
labels:
name: pod-cpu-hog
app.kubernetes.io/part-of: litmus
app.kubernetes.io/component: chaosexperiment
app.kubernetes.io/version: latest
spec:
definition:
scope: Namespaced
permissions:
# Create and monitor the experiment & helper pods
- apiGroups: [""]
resources: ["pods"]
verbs:
[
"create",
"delete",
"get",
"list",
"patch",
"update",
"deletecollection",
]
# Performs CRUD operations on the events inside chaosengine and chaosresult
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "get", "list", "patch", "update"]
# Fetch configmaps details and mount it to the experiment pod (if specified)
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["get", "list"]
# Track and get the runner, experiment, and helper pods log
- apiGroups: [""]
resources: ["pods/log"]
verbs: ["get", "list", "watch"]
# for creating and managing to execute commands inside target container
- apiGroups: [""]
resources: ["pods/exec"]
verbs: ["get", "list", "create"]
# deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets})
- apiGroups: ["apps"]
resources: ["deployments", "statefulsets", "replicasets", "daemonsets"]
verbs: ["list", "get"]
# deriving the parent/owner details of the pod(if parent is deploymentConfig)
- apiGroups: ["apps.openshift.io"]
resources: ["deploymentconfigs"]
verbs: ["list", "get"]
# deriving the parent/owner details of the pod(if parent is deploymentConfig)
- apiGroups: [""]
resources: ["replicationcontrollers"]
verbs: ["get", "list"]
# deriving the parent/owner details of the pod(if parent is argo-rollouts)
- apiGroups: ["argoproj.io"]
resources: ["rollouts"]
verbs: ["list", "get"]
# for configuring and monitor the experiment job by the chaos-runner pod
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["create", "list", "get", "delete", "deletecollection"]
# for creation, status polling and deletion of litmus chaos resources used within a chaos workflow
- apiGroups: ["litmuschaos.io"]
resources: ["chaosengines", "chaosexperiments", "chaosresults"]
verbs: ["create", "list", "get", "patch", "update", "delete"]
image: "litmuschaos/go-runner:latest"
imagePullPolicy: Always
args:
- -c
- ./experiments -name pod-cpu-hog
command:
- /bin/bash
env:
- name: TOTAL_CHAOS_DURATION
value: "60"
## Number of CPU cores to stress
- name: CPU_CORES
value: "1"
## LOAD CPU WITH GIVEN PERCENT LOADING FOR THE CPU STRESS WORKERS.
## 0 IS EFFECTIVELY A SLEEP (NO LOAD) AND 100 IS FULL LOADING
- name: CPU_LOAD
value: "100"
## Percentage of total pods to target
- name: PODS_AFFECTED_PERC
value: ""
## Period to wait before and after injection of chaos in sec
- name: RAMP_TIME
value: ""
## It is used in pumba lib only
- name: LIB_IMAGE
value: "litmuschaos/go-runner:latest"
## It is used in pumba lib only
- name: STRESS_IMAGE
value: "alexeiled/stress-ng:latest-ubuntu"
## provide the cluster runtime
- name: CONTAINER_RUNTIME
value: "containerd"
# provide the socket file path
- name: SOCKET_PATH
value: "/run/containerd/containerd.sock"
- name: TARGET_CONTAINER
value: ""
- name: TARGET_PODS
value: ""
- name: DEFAULT_HEALTH_CHECK
value: "false"
# To select pods on specific node(s)
- name: NODE_LABEL
value: ""
## it defines the sequence of chaos execution for multiple target pods
## supported values: serial, parallel
- name: SEQUENCE
value: "parallel"
labels:
name: pod-cpu-hog
app.kubernetes.io/part-of: litmus
app.kubernetes.io/component: experiment-job
app.kubernetes.io/runtime-api-usage: "true"
app.kubernetes.io/version: latest
- name: pod-delete-rm5
path: /tmp/pod-delete-rm5.yaml
raw:
data: |
apiVersion: litmuschaos.io/v1alpha1
description:
message: |
Deletes a pod belonging to a deployment/statefulset/daemonset
kind: ChaosExperiment
metadata:
name: pod-delete
labels:
name: pod-delete
app.kubernetes.io/part-of: litmus
app.kubernetes.io/component: chaosexperiment
app.kubernetes.io/version: latest
spec:
definition:
scope: Namespaced
permissions:
# Create and monitor the experiment & helper pods
- apiGroups: [""]
resources: ["pods"]
verbs:
[
"create",
"delete",
"get",
"list",
"patch",
"update",
"deletecollection",
]
# Performs CRUD operations on the events inside chaosengine and chaosresult
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "get", "list", "patch", "update"]
# Fetch configmaps details and mount it to the experiment pod (if specified)
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["get", "list"]
# Track and get the runner, experiment, and helper pods log
- apiGroups: [""]
resources: ["pods/log"]
verbs: ["get", "list", "watch"]
# for creating and managing to execute commands inside target container
- apiGroups: [""]
resources: ["pods/exec"]
verbs: ["get", "list", "create"]
# deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets})
- apiGroups: ["apps"]
resources: ["deployments", "statefulsets", "replicasets", "daemonsets"]
verbs: ["list", "get"]
# deriving the parent/owner details of the pod(if parent is deploymentConfig)
- apiGroups: ["apps.openshift.io"]
resources: ["deploymentconfigs"]
verbs: ["list", "get"]
# deriving the parent/owner details of the pod(if parent is deploymentConfig)
- apiGroups: [""]
resources: ["replicationcontrollers"]
verbs: ["get", "list"]
# deriving the parent/owner details of the pod(if parent is argo-rollouts)
- apiGroups: ["argoproj.io"]
resources: ["rollouts"]
verbs: ["list", "get"]
# for configuring and monitor the experiment job by the chaos-runner pod
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["create", "list", "get", "delete", "deletecollection"]
# for creation, status polling and deletion of litmus chaos resources used within a chaos workflow
- apiGroups: ["litmuschaos.io"]
resources: ["chaosengines", "chaosexperiments", "chaosresults"]
verbs: ["create", "list", "get", "patch", "update", "delete"]
image: "litmuschaos/go-runner:latest"
imagePullPolicy: Always
args:
- -c
- ./experiments -name pod-delete
command:
- /bin/bash
env:
- name: TOTAL_CHAOS_DURATION
value: "15"
# Period to wait before and after injection of chaos in sec
- name: RAMP_TIME
value: ""
- name: FORCE
value: "true"
- name: CHAOS_INTERVAL
value: "5"
## percentage of total pods to target
- name: PODS_AFFECTED_PERC
value: ""
- name: TARGET_CONTAINER
value: ""
- name: TARGET_PODS
value: ""
- name: DEFAULT_HEALTH_CHECK
value: "false"
# To select pods on specific node(s)
- name: NODE_LABEL
value: ""
## it defines the sequence of chaos execution for multiple target pods
## supported values: serial, parallel
- name: SEQUENCE
value: "parallel"
labels:
name: pod-delete
app.kubernetes.io/part-of: litmus
app.kubernetes.io/component: experiment-job
app.kubernetes.io/version: latest
- name: pod-network-loss-x1w
path: /tmp/pod-network-loss-x1w.yaml
raw:
data: |
apiVersion: litmuschaos.io/v1alpha1
description:
message: |
Injects network packet loss on pods belonging to an app deployment
kind: ChaosExperiment
metadata:
name: pod-network-loss
labels:
name: pod-network-loss
app.kubernetes.io/part-of: litmus
app.kubernetes.io/component: chaosexperiment
app.kubernetes.io/version: latest
spec:
definition:
scope: Namespaced
permissions:
# Create and monitor the experiment & helper pods
- apiGroups: [""]
resources: ["pods"]
verbs:
[
"create",
"delete",
"get",
"list",
"patch",
"update",
"deletecollection",
]
# Performs CRUD operations on the events inside chaosengine and chaosresult
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "get", "list", "patch", "update"]
# Fetch configmaps details and mount it to the experiment pod (if specified)
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["get", "list"]
# Track and get the runner, experiment, and helper pods log
- apiGroups: [""]
resources: ["pods/log"]
verbs: ["get", "list", "watch"]
# for creating and managing to execute commands inside target container
- apiGroups: [""]
resources: ["pods/exec"]
verbs: ["get", "list", "create"]
# deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets})
- apiGroups: ["apps"]
resources: ["deployments", "statefulsets", "replicasets", "daemonsets"]
verbs: ["list", "get"]
# deriving the parent/owner details of the pod(if parent is deploymentConfig)
- apiGroups: ["apps.openshift.io"]
resources: ["deploymentconfigs"]
verbs: ["list", "get"]
# deriving the parent/owner details of the pod(if parent is deploymentConfig)
- apiGroups: [""]
resources: ["replicationcontrollers"]
verbs: ["get", "list"]
# deriving the parent/owner details of the pod(if parent is argo-rollouts)
- apiGroups: ["argoproj.io"]
resources: ["rollouts"]
verbs: ["list", "get"]
# for configuring and monitor the experiment job by the chaos-runner pod
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["create", "list", "get", "delete", "deletecollection"]
# for creation, status polling and deletion of litmus chaos resources used within a chaos workflow
- apiGroups: ["litmuschaos.io"]
resources: ["chaosengines", "chaosexperiments", "chaosresults"]
verbs: ["create", "list", "get", "patch", "update", "delete"]
image: "litmuschaos/go-runner:latest"
imagePullPolicy: Always
args:
- -c
- ./experiments -name pod-network-loss
command:
- /bin/bash
env:
- name: TARGET_CONTAINER
value: ""
# provide lib image
- name: LIB_IMAGE
value: "litmuschaos/go-runner:latest"
- name: NETWORK_INTERFACE
value: "eth0"
- name: TC_IMAGE
value: "gaiadocker/iproute2"
- name: NETWORK_PACKET_LOSS_PERCENTAGE
value: "100" #in PERCENTAGE
- name: TOTAL_CHAOS_DURATION
value: "60" # in seconds
# time period to wait before and after injection of chaos in sec
- name: RAMP_TIME
value: ""
## percentage of total pods to target
- name: PODS_AFFECTED_PERC
value: ""
- name: DEFAULT_HEALTH_CHECK
value: "false"
- name: TARGET_PODS
value: ""
# To select pods on specific node(s)
- name: NODE_LABEL
value: ""
# provide the name of container runtime
# for litmus LIB, it supports docker, containerd, crio
# for pumba LIB, it supports docker only
- name: CONTAINER_RUNTIME
value: "containerd"
# provide the destination ips
# chaos injection will be triggered for these destination ips
- name: DESTINATION_IPS
value: ""
# provide the destination hosts
# chaos injection will be triggered for these destination hosts
- name: DESTINATION_HOSTS
value: ""
# provide the socket file path
- name: SOCKET_PATH
value: "/run/containerd/containerd.sock"
## it defines the sequence of chaos execution for multiple target pods
## supported values: serial, parallel
- name: SEQUENCE
value: "parallel"
labels:
name: pod-network-loss
app.kubernetes.io/part-of: litmus
app.kubernetes.io/component: experiment-job
app.kubernetes.io/runtime-api-usage: "true"
app.kubernetes.io/version: latest
- name: pod-memory-hog-49a
path: /tmp/pod-memory-hog-49a.yaml
raw:
data: |
apiVersion: litmuschaos.io/v1alpha1
description:
message: |
Injects memory consumption on pods belonging to an app deployment
kind: ChaosExperiment
metadata:
name: pod-memory-hog
labels:
name: pod-memory-hog
app.kubernetes.io/part-of: litmus
app.kubernetes.io/component: chaosexperiment
app.kubernetes.io/version: latest
spec:
definition:
scope: Namespaced
permissions:
# Create and monitor the experiment & helper pods
- apiGroups: [""]
resources: ["pods"]
verbs:
[
"create",
"delete",
"get",
"list",
"patch",
"update",
"deletecollection",
]
# Performs CRUD operations on the events inside chaosengine and chaosresult
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "get", "list", "patch", "update"]
# Fetch configmaps details and mount it to the experiment pod (if specified)
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["get", "list"]
# Track and get the runner, experiment, and helper pods log
- apiGroups: [""]
resources: ["pods/log"]
verbs: ["get", "list", "watch"]
# for creating and managing to execute commands inside target container
- apiGroups: [""]
resources: ["pods/exec"]
verbs: ["get", "list", "create"]
# deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets})
- apiGroups: ["apps"]
resources: ["deployments", "statefulsets", "replicasets", "daemonsets"]
verbs: ["list", "get"]
# deriving the parent/owner details of the pod(if parent is deploymentConfig)
- apiGroups: ["apps.openshift.io"]
resources: ["deploymentconfigs"]
verbs: ["list", "get"]
# deriving the parent/owner details of the pod(if parent is deploymentConfig)
- apiGroups: [""]
resources: ["replicationcontrollers"]
verbs: ["get", "list"]
# deriving the parent/owner details of the pod(if parent is argo-rollouts)
- apiGroups: ["argoproj.io"]
resources: ["rollouts"]
verbs: ["list", "get"]
# for configuring and monitor the experiment job by the chaos-runner pod
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["create", "list", "get", "delete", "deletecollection"]
# for creation, status polling and deletion of litmus chaos resources used within a chaos workflow
- apiGroups: ["litmuschaos.io"]
resources: ["chaosengines", "chaosexperiments", "chaosresults"]
verbs: ["create", "list", "get", "patch", "update", "delete"]
image: "litmuschaos/go-runner:latest"
imagePullPolicy: Always
args:
- -c
- ./experiments -name pod-memory-hog
command:
- /bin/bash
env:
- name: TOTAL_CHAOS_DURATION
value: "60"
## enter the amount of memory in megabytes to be consumed by the application pod
- name: MEMORY_CONSUMPTION
value: "500"
## Number of workers to perform stress
- name: NUMBER_OF_WORKERS
value: "1"
## percentage of total pods to target
- name: PODS_AFFECTED_PERC
value: ""
## Period to wait before and after injection of chaos in sec
- name: RAMP_TIME
value: ""
## It is used in pumba lib only
- name: LIB_IMAGE
value: "litmuschaos/go-runner:latest"
## It is used in pumba lib only
- name: STRESS_IMAGE
value: "alexeiled/stress-ng:latest-ubuntu"
## provide the cluster runtime
- name: CONTAINER_RUNTIME
value: "containerd"
# provide the socket file path
- name: SOCKET_PATH
value: "/run/containerd/containerd.sock"
## it defines the sequence of chaos execution for multiple target pods
## supported values: serial, parallel
- name: SEQUENCE
value: "parallel"
- name: DEFAULT_HEALTH_CHECK
value: "false"
- name: TARGET_CONTAINER
value: ""
- name: TARGET_PODS
value: ""
# To select pods on specific node(s)
- name: NODE_LABEL
value: ""
labels:
name: pod-memory-hog
app.kubernetes.io/part-of: litmus
app.kubernetes.io/component: experiment-job
app.kubernetes.io/runtime-api-usage: "true"
app.kubernetes.io/version: latest
- name: disk-fill-h7d
path: /tmp/disk-fill-h7d.yaml
raw:
data: |
apiVersion: litmuschaos.io/v1alpha1
description:
message: |
Fillup Ephemeral Storage of a Resource
kind: ChaosExperiment
metadata:
name: disk-fill
labels:
name: disk-fill
app.kubernetes.io/part-of: litmus
app.kubernetes.io/component: chaosexperiment
app.kubernetes.io/version: latest
spec:
definition:
scope: Namespaced
permissions:
# Create and monitor the experiment & helper pods
- apiGroups: [""]
resources: ["pods"]
verbs:
[
"create",
"delete",
"get",
"list",
"patch",
"update",
"deletecollection",
]
# Performs CRUD operations on the events inside chaosengine and chaosresult
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "get", "list", "patch", "update"]
# Fetch configmaps details and mount it to the experiment pod (if specified)
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["get", "list"]
# Track and get the runner, experiment, and helper pods log
- apiGroups: [""]
resources: ["pods/log"]
verbs: ["get", "list", "watch"]
# for creating and managing to execute commands inside target container
- apiGroups: [""]
resources: ["pods/exec"]
verbs: ["get", "list", "create"]
# deriving the parent/owner details of the pod(if parent is anyof {deployment, statefulset, daemonsets})
- apiGroups: ["apps"]
resources: ["deployments", "statefulsets", "replicasets", "daemonsets"]
verbs: ["list", "get"]
# deriving the parent/owner details of the pod(if parent is deploymentConfig)
- apiGroups: ["apps.openshift.io"]
resources: ["deploymentconfigs"]
verbs: ["list", "get"]
# deriving the parent/owner details of the pod(if parent is deploymentConfig)
- apiGroups: [""]
resources: ["replicationcontrollers"]
verbs: ["get", "list"]
# deriving the parent/owner details of the pod(if parent is argo-rollouts)
- apiGroups: ["argoproj.io"]
resources: ["rollouts"]
verbs: ["list", "get"]
# for configuring and monitor the experiment job by the chaos-runner pod
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["create", "list", "get", "delete", "deletecollection"]
# for creation, status polling and deletion of litmus chaos resources used within a chaos workflow
- apiGroups: ["litmuschaos.io"]
resources: ["chaosengines", "chaosexperiments", "chaosresults"]
verbs: ["create", "list", "get", "patch", "update", "delete"]
image: "litmuschaos/go-runner:latest"
imagePullPolicy: Always
args:
- -c
- ./experiments -name disk-fill
command:
- /bin/bash
env:
- name: TARGET_CONTAINER
value: ""
- name: FILL_PERCENTAGE
value: "80"
- name: TOTAL_CHAOS_DURATION
value: "60"
# Period to wait before and after injection of chaos in sec
- name: RAMP_TIME
value: ""
# provide the data block size
# supported unit is KB
- name: DATA_BLOCK_SIZE
value: "256"
- name: TARGET_PODS
value: ""
- name: EPHEMERAL_STORAGE_MEBIBYTES
value: ""
# To select pods on specific node(s)
- name: NODE_LABEL
value: ""
## percentage of total pods to target
- name: PODS_AFFECTED_PERC
value: ""
- name: DEFAULT_HEALTH_CHECK
value: "false"
- name: LIB_IMAGE
value: "litmuschaos/go-runner:latest"
# Provide the container runtime path
# Default set to docker container path
- name: CONTAINER_PATH
value: "/var/lib/docker/containers"
## it defines the sequence of chaos execution for multiple target pods
## supported values: serial, parallel
- name: SEQUENCE
value: "parallel"
labels:
name: disk-fill
app.kubernetes.io/part-of: litmus
app.kubernetes.io/component: experiment-job
app.kubernetes.io/host-path-usage: "true"
app.kubernetes.io/version: latest
outputs: {}
metadata: {}
container:
name: ""
image: litmuschaos/k8s:latest
command:
- sh
- "-c"
args:
- kubectl apply -f /tmp/ -n {{workflow.parameters.adminModeNamespace}} && sleep 30
resources: {}
- name: pod-cpu-hog
inputs:
artifacts:
- name: pod-cpu-hog
path: /tmp/chaosengine.yaml
raw:
data: |
apiVersion: litmuschaos.io/v1alpha1
kind: ChaosEngine
metadata:
name: pod-cpu-hog-chaos
namespace: {{workflow.parameters.adminModeNamespace}}
labels:
context: "{{workflow.parameters.appNamespace}}_carts"
annotations: {}
spec:
appinfo:
appns: 'sock-shop'
applabel: 'name=carts'
appkind: 'deployment'
jobCleanUpPolicy: retain
engineState: 'active'
chaosServiceAccount: litmus-admin
experiments:
- name: pod-cpu-hog
spec:
probe:
- name: "check-frontend-access-url"
type: "httpProbe"
httpProbe/inputs:
url: "http://front-end.sock-shop.svc.cluster.local:80"
insecureSkipVerify: false
responseTimeout: 100
method:
get:
criteria: "=="
responseCode: "200"
mode: "Continuous"
runProperties:
probeTimeout: 1s
interval: 100ms
attempt: 2
probePollingInterval: 1s
- name: "check-probe-success"
type: "promProbe"
promProbe/inputs:
endpoint: "http://prometheus-k8s.monitoring.svc.cluster.local:9090"
query: "sum(rate(request_duration_seconds_count{job='sock-shop/front-end',route='/',status_code='200'}[20s]))*100"
comparator:
criteria: ">=" #supports >=,<=,>,<,==,!= comparision
value: "100"
mode: "Edge"
runProperties:
probeTimeout: 1s
interval: 100ms
attempt: 2
components:
env:
#number of cpu cores to be consumed
#verify the resources the app has been launched with
- name: CPU_CORES
value: '1'
- name: TOTAL_CHAOS_DURATION
value: '30' # in seconds
- name: CHAOS_KILL_COMMAND
value: "kill -9 $(ps afx | grep \"[md5sum] /dev/zero\" | awk '{print$1}' | tr '\n' ' ')"
container:
image: litmuschaos/litmus-checker:latest
args: ["-file=/tmp/chaosengine.yaml","-saveName=/tmp/engine-name"]
- name: pod-memory-hog
inputs:
artifacts:
- name: pod-memory-hog
path: /tmp/chaosengine.yaml
raw:
data: |
apiVersion: litmuschaos.io/v1alpha1
kind: ChaosEngine
metadata:
name: pod-memory-hog-chaos
namespace: {{workflow.parameters.adminModeNamespace}}
labels:
context: "{{workflow.parameters.appNamespace}}_orders"
annotations: {}
spec:
appinfo:
appns: 'sock-shop'
applabel: 'name=orders'
appkind: 'deployment'
jobCleanUpPolicy: retain
engineState: 'active'
chaosServiceAccount: litmus-admin
experiments:
- name: pod-memory-hog
spec:
probe:
- name: "check-frontend-access-url"
type: "httpProbe"
httpProbe/inputs:
url: "http://front-end.sock-shop.svc.cluster.local:80"
insecureSkipVerify: false
responseTimeout: 100
method:
get:
criteria: "=="
responseCode: "200"
mode: "Continuous"
runProperties:
probeTimeout: 1s
interval: 100ms
attempt: 2
probePollingInterval: 1s
- name: "check-probe-success"
type: "promProbe"
promProbe/inputs:
endpoint: "http://prometheus-k8s.monitoring.svc.cluster.local:9090"
query: "sum(rate(request_duration_seconds_count{job='sock-shop/front-end',route='/',status_code='200'}[20s]))*100"
comparator:
criteria: ">=" #supports >=,<=,>,<,==,!= comparision
value: "100"
mode: "Edge"
runProperties:
probeTimeout: 1s
interval: 100ms
attempt: 2
components:
env:
- name: MEMORY_CONSUMPTION
value: '500'
- name: TOTAL_CHAOS_DURATION
value: '30' # in seconds
- name: CHAOS_KILL_COMMAND
value: "kill -9 $(ps afx | grep \"[dd] if /dev/zero\" | awk '{print $1}' | tr '\n' ' ')"
container:
image: litmuschaos/litmus-checker:latest
args: ["-file=/tmp/chaosengine.yaml","-saveName=/tmp/engine-name"]
- name: pod-delete
inputs:
artifacts:
- name: pod-delete
path: /tmp/chaosengine.yaml
raw:
data: |
apiVersion: litmuschaos.io/v1alpha1
kind: ChaosEngine
metadata:
name: catalogue-pod-delete-chaos
namespace: {{workflow.parameters.adminModeNamespace}}
labels:
context: "{{workflow.env.appNamespace}}_catalogue"
annotations: {}
spec:
appinfo:
appns: 'sock-shop'
applabel: 'name=catalogue'
appkind: 'deployment'
engineState: 'active'
chaosServiceAccount: litmus-admin
jobCleanUpPolicy: 'retain'
components:
runner:
imagePullPolicy: Always
experiments:
- name: pod-delete
spec:
probe:
- name: "check-catalogue-access-url"
type: "httpProbe"
httpProbe/inputs:
url: "http://front-end.sock-shop.svc.cluster.local:80/catalogue"
insecureSkipVerify: false
responseTimeout: 100
method:
get:
criteria: "=="
responseCode: "200"
mode: "Continuous"
runProperties:
probeTimeout: 1s
interval: 100ms
attempt: 3
probePollingInterval: 1s
- name: "check-probe-success"
type: "promProbe"
promProbe/inputs:
endpoint: "http://prometheus-k8s.monitoring.svc.cluster.local:9090"
query: "sum(rate(request_duration_seconds_count{job='sock-shop/front-end',route='/',status_code='200'}[20s]))*100"
comparator:
criteria: ">=" #supports >=,<=,>,<,==,!= comparision
value: "100"
mode: "Edge"
runProperties:
probeTimeout: 1s
interval: 100ms
attempt: 2
components:
env:
- name: TOTAL_CHAOS_DURATION
value: '30'
# set chaos interval (in sec) as desired
- name: CHAOS_INTERVAL
value: '10'
# pod failures without '--force' & default terminationGracePeriodSeconds
- name: FORCE
value: 'false'
container:
image: litmuschaos/litmus-checker:latest
args: ["-file=/tmp/chaosengine.yaml","-saveName=/tmp/engine-name"]
- name: pod-network-loss
inputs:
artifacts:
- name: pod-network-loss
path: /tmp/chaosengine.yaml
raw:
data: |
apiVersion: litmuschaos.io/v1alpha1
kind: ChaosEngine
metadata:
name: pod-network-loss-chaos
namespace: {{workflow.parameters.adminModeNamespace}}
labels:
context: "{{workflow.parameters.appNamespace}}_user-db"
annotations: {}
spec:
appinfo:
appns: 'sock-shop'
applabel: 'name=user-db'
appkind: 'statefulset'
jobCleanUpPolicy: retain
engineState: 'active'
auxiliaryAppInfo: ''
chaosServiceAccount: litmus-admin
components:
runner:
imagePullPolicy: Always
experiments:
- name: pod-network-loss
spec:
probe:
- name: "check-cards-access-url"
type: "httpProbe"
httpProbe/inputs:
url: "http://front-end.sock-shop.svc.cluster.local:80/cards"
insecureSkipVerify: false
responseTimeout: 100
method:
get:
criteria: "=="
responseCode: "200"
mode: "Continuous"
runProperties:
probeTimeout: 1s
interval: 100ms
attempt: 3
probePollingInterval: 1s
- name: "check-probe-success"
type: "promProbe"
promProbe/inputs:
endpoint: "http://prometheus-k8s.monitoring.svc.cluster.local:9090"
query: "sum(rate(request_duration_seconds_count{job='sock-shop/front-end',route='/',status_code='200'}[20s]))*100"
comparator:
criteria: ">=" #supports >=,<=,>,<,==,!= comparision
value: "100"
mode: "Edge"
runProperties:
probeTimeout: 1s
interval: 100ms
attempt: 2
components:
env:
- name: TOTAL_CHAOS_DURATION
value: '30'
- name: NETWORK_INTERFACE
value: 'eth0'
- name: NETWORK_PACKET_LOSS_PERCENTAGE
value: '100'
- name: CONTAINER_RUNTIME
value: 'containerd'
- name: SOCKET_PATH
value: '/run/containerd/containerd.sock'
container:
image: litmuschaos/litmus-checker:latest
args: ["-file=/tmp/chaosengine.yaml","-saveName=/tmp/engine-name"]
- name: disk-fill
inputs:
artifacts:
- name: disk-fill
path: /tmp/chaosengine.yaml
raw:
data: |
apiVersion: litmuschaos.io/v1alpha1
kind: ChaosEngine
metadata:
name: catalogue-disk-fill
namespace: {{workflow.parameters.adminModeNamespace}}
labels:
context: "{{workflow.parameters.appNamespace}}_catalogue-db"
annotations: {}
spec:
appinfo:
appns: 'sock-shop'
applabel: 'name=catalogue-db'
appkind: 'statefulset'
engineState: 'active'
chaosServiceAccount: litmus-admin
jobCleanUpPolicy: 'retain'
components:
runner:
imagePullPolicy: Always
experiments:
- name: disk-fill
spec:
probe:
- name: "check-catalogue-db-cr-status"
type: "k8sProbe"
k8sProbe/inputs:
group: ""
version: "v1"
resource: "pods"
namespace: "sock-shop"
fieldSelector: "status.phase=Running"
labelSelector: "name=catalogue-db"
operation: "present"
mode: "Continuous"
runProperties:
probeTimeout: 1s
interval: 100ms
attempt: 1
probePollingInterval: 1s
- name: "check-probe-success"
type: "promProbe"
promProbe/inputs:
endpoint: "http://prometheus-k8s.monitoring.svc.cluster.local:9090"
query: "sum(rate(request_duration_seconds_count{job='sock-shop/front-end',route='/',status_code='200'}[20s]))*100"
comparator:
criteria: ">=" #supports >=,<=,>,<,==,!= comparision
value: "100"
mode: "Edge"
runProperties:
probeTimeout: 1s
interval: 100ms
attempt: 2
components:
env:
- name: FILL_PERCENTAGE
value: '100'
- name: TARGET_CONTAINER
value: ''
- name: TOTAL_CHAOS_DURATION
value: '30'
container:
image: litmuschaos/litmus-checker:latest
args: ["-file=/tmp/chaosengine.yaml","-saveName=/tmp/engine-name"]
- name: delete-application
container:
image: litmuschaos/litmus-app-deployer:latest
args: ["-namespace=sock-shop","-typeName=resilient","-operation=delete", "-app=sock-shop"]
- name: load-test
container:
image: litmuschaos/litmus-app-deployer:latest
args: ["-namespace=loadtest", "-app=loadtest"]
- name: delete-loadtest
container:
image: litmuschaos/litmus-app-deployer:latest
args: ["-namespace=loadtest","-operation=delete", "-app=loadtest"]
- name: cleanup-chaos-resources
container:
image: litmuschaos/k8s:latest
command: [sh, -c]
args:
[
"kubectl delete chaosengine pod-memory-hog-chaos pod-cpu-hog-chaos catalogue-pod-delete-chaos pod-network-loss-chaos -n {{workflow.parameters.adminModeNamespace}}",
]