feat: Add a rds-instance-stop chaos fault (#635)

* feat: Add a rds-instance-stop chaos fault

Signed-off-by: Jongwoo Han <jongwooo.han@gmail.com>

---------

Signed-off-by: Jongwoo Han <jongwooo.han@gmail.com>
This commit is contained in:
Jongwoo Han
2025-04-24 16:24:44 +09:00
committed by GitHub
parent 160f11c322
commit 44ddb75e96
7 changed files with 281 additions and 0 deletions

View File

@@ -34,6 +34,9 @@ spec:
- name: ebs-loss-by-tag
description: It detach the EBS volume from AWS EC2 instance identified by Tag for a certain chaos duration.
displayName: "EBS Loss By Tag"
- name: rds-instance-stop
description: It stops the RDS instance identified by instance identifier.
displayName: "RDS Instance Stop"
keywords:
- AWS
maintainers:

View File

@@ -22,3 +22,6 @@ faults:
- name: ebs-loss-by-tag
CSV: ebs-loss-by-tag.chartserviceversion.yaml
desc: "ebs-loss-by-tag"
- name: rds-instance-stop
CSV: rds-instance-stop.chartserviceversion.yaml
desc: "rds-instance-stop"

View File

@@ -779,3 +779,107 @@ spec:
mountPath: /tmp/
---
---
apiVersion: litmuschaos.io/v1alpha1
description:
message: |
Stopping an RDS instance identified by instance identifier.
kind: ChaosExperiment
metadata:
name: rds-instance-stop
labels:
name: rds-instance-stop
app.kubernetes.io/part-of: litmus
app.kubernetes.io/component: chaosexperiment
app.kubernetes.io/version: ci
spec:
definition:
scope: Cluster
permissions:
# Create and monitor the experiment & helper pods
- apiGroups: [""]
resources: ["pods"]
verbs:
[
"create",
"delete",
"get",
"list",
"patch",
"update",
"deletecollection",
]
# Performs CRUD operations on the events inside chaosengine and chaosresult
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "get", "list", "patch", "update"]
# Fetch configmaps & secrets details and mount it to the experiment pod (if specified)
- apiGroups: [""]
resources: ["secrets", "configmaps"]
verbs: ["get", "list"]
# Track and get the runner, experiment, and helper pods log
- apiGroups: [""]
resources: ["pods/log"]
verbs: ["get", "list", "watch"]
# for creating and managing to execute commands inside target container
- apiGroups: [""]
resources: ["pods/exec"]
verbs: ["get", "list", "create"]
# for configuring and monitor the experiment job by the chaos-runner pod
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["create", "list", "get", "delete", "deletecollection"]
# for creation, status polling and deletion of litmus chaos resources used within a chaos workflow
- apiGroups: ["litmuschaos.io"]
resources: ["chaosengines", "chaosexperiments", "chaosresults"]
verbs: ["create", "list", "get", "patch", "update", "delete"]
# for experiment to perform node status checks
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "list"]
image: "litmuschaos.docker.scarf.sh/litmuschaos/go-runner:latest"
imagePullPolicy: Always
args:
- -c
- ./experiments -name rds-instance-stop
command:
- /bin/bash
env:
- name: TOTAL_CHAOS_DURATION
value: "30"
- name: CHAOS_INTERVAL
value: "30"
# Period to wait before and after injection of chaos in sec
- name: RAMP_TIME
value: ""
# Provide the RDS instance identifier
- name: RDS_INSTANCE_IDENTIFIER
value: ""
- name: REGION
value: ""
- name: INSTANCE_AFFECTED_PERC
value: ""
- name: SEQUENCE
value: "parallel"
- name: DEFAULT_HEALTH_CHECK
value: "false"
# Provide the path of AWS credentials mounted from secret
- name: AWS_SHARED_CREDENTIALS_FILE
value: "/tmp/cloud_config.yml"
labels:
name: rds-instance-stop
app.kubernetes.io/part-of: litmus
app.kubernetes.io/component: experiment-job
app.kubernetes.io/version: ci
secrets:
- name: cloud-secret
mountPath: /tmp/

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

View File

@@ -0,0 +1,34 @@
---
apiVersion: litmuschaos.io/v1alpha1
kind: ChaosEngine
metadata:
name: aws-chaos
namespace: default
spec:
engineState: 'active'
chaosServiceAccount: litmus-admin
experiments:
- name: rds-instance-stop
spec:
components:
env:
# set chaos duration (in sec) as desired
- name: TOTAL_CHAOS_DURATION
value: '30'
# set interval duration (in sec) as desired
- name: CHAOS_INTERVAL
value: '30'
# Instance Identifier of the target RDS instances
# ex: rds-demo-instance-1,rds-demo-instance-2
- name: RDS_INSTANCE_IDENTIFIER
value: ''
# provide the region name of the instance
- name: REGION
value: ''
# Target the percentage of instance filtered from tag
- name: INSTANCE_AFFECTED_PERC
value: ''

View File

@@ -0,0 +1,104 @@
---
apiVersion: litmuschaos.io/v1alpha1
description:
message: |
Stopping an RDS instance identified by instance identifier.
kind: ChaosExperiment
metadata:
name: rds-instance-stop
labels:
name: rds-instance-stop
app.kubernetes.io/part-of: litmus
app.kubernetes.io/component: chaosexperiment
app.kubernetes.io/version: ci
spec:
definition:
scope: Cluster
permissions:
# Create and monitor the experiment & helper pods
- apiGroups: [""]
resources: ["pods"]
verbs:
[
"create",
"delete",
"get",
"list",
"patch",
"update",
"deletecollection",
]
# Performs CRUD operations on the events inside chaosengine and chaosresult
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "get", "list", "patch", "update"]
# Fetch configmaps & secrets details and mount it to the experiment pod (if specified)
- apiGroups: [""]
resources: ["secrets", "configmaps"]
verbs: ["get", "list"]
# Track and get the runner, experiment, and helper pods log
- apiGroups: [""]
resources: ["pods/log"]
verbs: ["get", "list", "watch"]
# for creating and managing to execute commands inside target container
- apiGroups: [""]
resources: ["pods/exec"]
verbs: ["get", "list", "create"]
# for configuring and monitor the experiment job by the chaos-runner pod
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["create", "list", "get", "delete", "deletecollection"]
# for creation, status polling and deletion of litmus chaos resources used within a chaos workflow
- apiGroups: ["litmuschaos.io"]
resources: ["chaosengines", "chaosexperiments", "chaosresults"]
verbs: ["create", "list", "get", "patch", "update", "delete"]
# for experiment to perform node status checks
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "list"]
image: "litmuschaos.docker.scarf.sh/litmuschaos/go-runner:latest"
imagePullPolicy: Always
args:
- -c
- ./experiments -name rds-instance-stop
command:
- /bin/bash
env:
- name: TOTAL_CHAOS_DURATION
value: "30"
- name: CHAOS_INTERVAL
value: "30"
# Period to wait before and after injection of chaos in sec
- name: RAMP_TIME
value: ""
# Provide the RDS instance identifier
- name: RDS_INSTANCE_IDENTIFIER
value: ""
- name: REGION
value: ""
- name: INSTANCE_AFFECTED_PERC
value: ""
- name: SEQUENCE
value: "parallel"
- name: DEFAULT_HEALTH_CHECK
value: "false"
# Provide the path of AWS credentials mounted from secret
- name: AWS_SHARED_CREDENTIALS_FILE
value: "/tmp/cloud_config.yml"
labels:
name: rds-instance-stop
app.kubernetes.io/part-of: litmus
app.kubernetes.io/component: experiment-job
app.kubernetes.io/version: ci
secrets:
- name: cloud-secret
mountPath: /tmp/

View File

@@ -0,0 +1,33 @@
---
apiVersion: litmuschaos.io/v1alpha1
kind: ChartServiceVersion
metadata:
createdAt: 2024-09-06T10:28:08Z
name: rds-instance-stop
version: 0.1.0
annotations:
categories: AWS
vendor: LitmusChaos
spec:
displayName: RDS Instance Stop
categoryDescription: |
This fault causes a state change of an RDS instance to a stopped state for a specified chaos duration.
- Causes state change of an RDS instance to stopped state using instance identifier before bringing it back to available state after the specified chaos duration.
- It helps to check the performance of the application when the RDS instance is stopped.
keywords:
- AWS
platforms:
- AWS
maintainers:
- name: Jongwoo Han
email: jongwooo.han@gmail.com
chaosType: infra
labels:
app.kubernetes.io/component: chartserviceversion
app.kubernetes.io/version: ci
links:
- name: Documentation
url: https://litmuschaos.github.io/litmus/experiments/categories/contents
icon:
- url:
mediatype: ""