ChaosTestKafkaMQ.groovy 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. pipeline {
  2. options {
  3. timestamps()
  4. timeout(time: 30, unit: 'MINUTES') // timeout on this stage
  5. }
  6. agent {
  7. kubernetes {
  8. label "milvus-test"
  9. defaultContainer 'main'
  10. yamlFile "build/ci/jenkins/pod/chaos-test.yaml"
  11. customWorkspace '/home/jenkins/agent/workspace'
  12. // idle 5 minutes to wait clean up tasks
  13. idleMinutes 5
  14. }
  15. }
  16. parameters{
  17. choice(
  18. description: 'Chaos Test Type',
  19. name: 'chaos_type',
  20. choices: ['pod-kill', 'pod-failure', 'mem-stress', 'network-latency', 'network-partition', 'io-latency']
  21. )
  22. choice(
  23. description: 'Chaos Test Target: \
  24. mem-stress: datanode, etcd, indexnode, minio, proxy, kafka, querynode, standalone \
  25. io-fault & io-latency: minio, kafka, etcd ',
  26. name: 'pod_name',
  27. choices: ["allstandalone", "allcluster", "standalone", "datacoord", "datanode", "indexcoord", "indexnode", "proxy", "kafka", "querycoord", "querynode", "rootcoord", "etcd", "minio"]
  28. )
  29. choice(
  30. description: 'Chaos Test Task',
  31. name: 'chaos_task',
  32. choices: ['chaos-test', 'data-consist-test']
  33. )
  34. string(
  35. description: 'Image Repository',
  36. name: 'image_repository',
  37. defaultValue: 'harbor.milvus.io/dockerhub/milvusdb/milvus'
  38. )
  39. string(
  40. description: 'Image Tag',
  41. name: 'image_tag',
  42. defaultValue: 'master-latest'
  43. )
  44. string(
  45. description: 'Wait Time after chaos test',
  46. name: 'idel_time',
  47. defaultValue: '1'
  48. )
  49. string(
  50. description: 'Etcd Image Repository',
  51. name: 'etcd_image_repository',
  52. defaultValue: "milvusdb/etcd"
  53. )
  54. string(
  55. description: 'Etcd Image Tag',
  56. name: 'etcd_image_tag',
  57. defaultValue: "3.5.0-r6"
  58. )
  59. string(
  60. description: 'QueryNode Nums',
  61. name: 'querynode_nums',
  62. defaultValue: '3'
  63. )
  64. string(
  65. description: 'DataNode Nums',
  66. name: 'datanode_nums',
  67. defaultValue: '2'
  68. )
  69. string(
  70. description: 'IndexNode Nums',
  71. name: 'indexnode_nums',
  72. defaultValue: '1'
  73. )
  74. string(
  75. description: 'Proxy Nums',
  76. name: 'proxy_nums',
  77. defaultValue: '1'
  78. )
  79. booleanParam(
  80. description: 'Keep Env',
  81. name: 'keep_env',
  82. defaultValue: 'false'
  83. )
  84. }
  85. environment {
  86. ARTIFACTS = "${env.WORKSPACE}/_artifacts"
  87. RELEASE_NAME = "${params.pod_name}-${params.chaos_type}-${env.BUILD_ID}"
  88. NAMESPACE = "chaos-testing"
  89. }
  90. stages {
  91. stage ('Install Dependency') {
  92. steps {
  93. container('main') {
  94. dir ('tests/python_client') {
  95. script {
  96. sh "pip install -r requirements.txt --trusted-host https://test.pypi.org"
  97. }
  98. }
  99. }
  100. }
  101. }
  102. stage ('Modify Milvus chart values') {
  103. steps {
  104. container('main') {
  105. dir ('tests/python_client/chaos') {
  106. script {
  107. sh """
  108. yq -i '.kafka.enabled = true' standalone-values.yaml
  109. yq -i '.kafka.enabled = true' cluster-values.yaml
  110. yq -i '.queryNode.replicas = "${params.querynode_nums}"' cluster-values.yaml
  111. yq -i '.dataNode.replicas = "${params.datanode_nums}"' cluster-values.yaml
  112. yq -i '.indexNode.replicas = "${params.indexnode_nums}"' cluster-values.yaml
  113. yq -i '.proxy.replicas = "${params.proxy_nums}"' cluster-values.yaml
  114. yq -i '.etcd.image.repository = "${params.etcd_image_repository}"' cluster-values.yaml
  115. yq -i '.etcd.image.tag = "${params.etcd_image_tag}"' cluster-values.yaml
  116. yq -i '.etcd.image.repository = "${params.etcd_image_repository}"' standalone-values.yaml
  117. yq -i '.etcd.image.tag = "${params.etcd_image_tag}"' standalone-values.yaml
  118. """
  119. }
  120. }
  121. }
  122. }
  123. }
  124. stage ('Deploy Milvus') {
  125. options {
  126. timeout(time: 15, unit: 'MINUTES') // timeout on this stage
  127. }
  128. steps {
  129. container('main') {
  130. dir ('tests/python_client/chaos/scripts') {
  131. script {
  132. def image_tag_modified = ""
  133. if ("${params.image_tag}" == "master-latest") {
  134. image_tag_modified = sh(returnStdout: true, script: 'bash ../../../../scripts/docker_image_find_tag.sh -n milvusdb/milvus -t master-latest -f master- -F -L -q').trim()
  135. }
  136. else {
  137. image_tag_modified = "${params.image_tag}"
  138. }
  139. sh "echo ${image_tag_modified}"
  140. sh "echo ${params.chaos_type}"
  141. sh "helm repo add milvus https://zilliztech.github.io/milvus-helm"
  142. sh "helm repo update"
  143. def pod_name = "${params.pod_name}"
  144. if (pod_name.contains("standalone")){
  145. sh"""
  146. IMAGE_TAG="${image_tag_modified}" \
  147. REPOSITORY="${params.image_repository}" \
  148. RELEASE_NAME="${env.RELEASE_NAME}" \
  149. bash install_milvus_standalone.sh
  150. """
  151. }else{
  152. sh"""
  153. IMAGE_TAG="${image_tag_modified}" \
  154. REPOSITORY="${params.image_repository}" \
  155. RELEASE_NAME="${env.RELEASE_NAME}" \
  156. bash install_milvus_cluster.sh
  157. """
  158. }
  159. sh "kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=${env.RELEASE_NAME} -n ${env.NAMESPACE} --timeout=360s"
  160. sh "kubectl wait --for=condition=Ready pod -l release=${env.RELEASE_NAME} -n ${env.NAMESPACE} --timeout=360s"
  161. sh "kubectl get pods -o wide|grep ${env.RELEASE_NAME}"
  162. }
  163. }
  164. }
  165. }
  166. }
  167. stage ('Run e2e test before chaos') {
  168. options {
  169. timeout(time: 5, unit: 'MINUTES') // timeout on this stage
  170. }
  171. steps {
  172. container('main') {
  173. dir ('tests/python_client/chaos') {
  174. script {
  175. def host = sh(returnStdout: true, script: "kubectl get svc/${env.RELEASE_NAME}-milvus -o jsonpath=\"{.spec.clusterIP}\"").trim()
  176. sh "pytest -s -v ../testcases/test_e2e.py --host $host --log-cli-level=INFO --capture=no"
  177. }
  178. }
  179. }
  180. }
  181. }
  182. stage ('Run hello_milvus before chaos') {
  183. options {
  184. timeout(time: 5, unit: 'MINUTES') // timeout on this stage
  185. }
  186. steps {
  187. container('main') {
  188. dir ('tests/python_client/chaos') {
  189. script {
  190. def host = sh(returnStdout: true, script: "kubectl get svc/${env.RELEASE_NAME}-milvus -o jsonpath=\"{.spec.clusterIP}\"").trim()
  191. sh "python3 scripts/hello_milvus.py --host $host"
  192. }
  193. }
  194. }
  195. }
  196. }
  197. stage ('Run chaos test'){
  198. options {
  199. timeout(time: 15, unit: 'MINUTES') // timeout on this stage
  200. }
  201. steps {
  202. container('main') {
  203. dir ('tests/python_client/chaos') {
  204. script {
  205. sh"""
  206. POD_NAME="${params.pod_name}" \
  207. CHAOS_TYPE="${params.chaos_type}" \
  208. RELEASE_NAME="${env.RELEASE_NAME}" \
  209. bash scripts/modify_config.sh
  210. """
  211. if ("${params.chaos_task}" == "chaos-test"){
  212. def host = sh(returnStdout: true, script: "kubectl get svc/${env.RELEASE_NAME}-milvus -o jsonpath=\"{.spec.clusterIP}\"").trim()
  213. sh "timeout 14m pytest -s -v test_chaos.py --host $host --log-cli-level=INFO --capture=no || echo 'chaos test fail' "
  214. }
  215. if ("${params.chaos_task}" == "data-consist-test"){
  216. def host = sh(returnStdout: true, script: "kubectl get svc/${env.RELEASE_NAME}-milvus -o jsonpath=\"{.spec.clusterIP}\"").trim()
  217. sh "timeout 14m pytest -s -v test_chaos_data_consist.py --host $host --log-cli-level=INFO --capture=no || echo 'chaos test fail' "
  218. }
  219. echo "chaos test done"
  220. sh "kubectl wait --for=condition=Ready pod -l app.kubernetes.io/instance=${env.RELEASE_NAME} -n ${env.NAMESPACE} --timeout=360s"
  221. sh "kubectl wait --for=condition=Ready pod -l release=${env.RELEASE_NAME} -n ${env.NAMESPACE} --timeout=360s"
  222. sh "kubectl get pods -o wide|grep ${env.RELEASE_NAME}"
  223. }
  224. }
  225. }
  226. }
  227. }
  228. stage ('result analysis') {
  229. steps {
  230. container('main') {
  231. dir ('tests/python_client/chaos/reports') {
  232. script {
  233. echo "result analysis"
  234. sh "cat ${env.RELEASE_NAME}.log || echo 'no log file'"
  235. }
  236. }
  237. }
  238. }
  239. }
  240. stage ('Milvus Idle Time') {
  241. steps {
  242. container('main') {
  243. dir ('tests/python_client/chaos') {
  244. script {
  245. echo "sleep ${params.idel_time}m"
  246. sh "sleep ${params.idel_time}m"
  247. }
  248. }
  249. }
  250. }
  251. }
  252. stage ('run e2e test after chaos') {
  253. options {
  254. timeout(time: 5, unit: 'MINUTES') // timeout on this stage
  255. }
  256. steps {
  257. container('main') {
  258. dir ('tests/python_client/chaos') {
  259. script {
  260. def host = sh(returnStdout: true, script: "kubectl get svc/${env.RELEASE_NAME}-milvus -o jsonpath=\"{.spec.clusterIP}\"").trim()
  261. sh "pytest -s -v ../testcases/test_e2e.py --host $host --log-cli-level=INFO --capture=no"
  262. sh "kubectl get pods -o wide|grep ${env.RELEASE_NAME}"
  263. }
  264. }
  265. }
  266. }
  267. }
  268. stage ('Run hello_milvus after chaos') {
  269. options {
  270. timeout(time: 5, unit: 'MINUTES') // timeout on this stage
  271. }
  272. steps {
  273. container('main') {
  274. dir ('tests/python_client/chaos') {
  275. script {
  276. def host = sh(returnStdout: true, script: "kubectl get svc/${env.RELEASE_NAME}-milvus -o jsonpath=\"{.spec.clusterIP}\"").trim()
  277. sh "python3 scripts/hello_milvus.py --host $host"
  278. sh "kubectl get pods -o wide|grep ${env.RELEASE_NAME}"
  279. }
  280. }
  281. }
  282. }
  283. }
  284. stage ('Verify all collections after chaos') {
  285. options {
  286. timeout(time: 10, unit: 'MINUTES') // timeout on this stage
  287. }
  288. steps {
  289. container('main') {
  290. dir ('tests/python_client/chaos') {
  291. script {
  292. def host = sh(returnStdout: true, script: "kubectl get svc/${env.RELEASE_NAME}-milvus -o jsonpath=\"{.spec.clusterIP}\"").trim()
  293. sh "python3 scripts/verify_all_collections.py --host $host"
  294. sh "kubectl get pods -o wide|grep ${env.RELEASE_NAME}"
  295. }
  296. }
  297. }
  298. }
  299. }
  300. }
  301. post {
  302. always {
  303. echo 'upload logs'
  304. container('main') {
  305. dir ('tests/python_client/chaos') {
  306. script {
  307. echo "get pod status"
  308. sh "kubectl get pods -o wide|grep ${env.RELEASE_NAME} || true"
  309. echo "collecte logs"
  310. sh "bash ../../scripts/export_log_k8s.sh ${env.NAMESPACE} ${env.RELEASE_NAME} k8s_log/${env.RELEASE_NAME} || true"
  311. sh "tar -zcvf artifacts-${env.RELEASE_NAME}-pytest-logs.tar.gz /tmp/ci_logs/ --remove-files || true"
  312. sh "tar -zcvf artifacts-${env.RELEASE_NAME}-server-logs.tar.gz k8s_log/ --remove-files || true"
  313. archiveArtifacts artifacts: "artifacts-${env.RELEASE_NAME}-pytest-logs.tar.gz", allowEmptyArchive: true
  314. archiveArtifacts artifacts: "artifacts-${env.RELEASE_NAME}-server-logs.tar.gz", allowEmptyArchive: true
  315. if ("${params.keep_env}" == "false"){
  316. sh "bash scripts/uninstall_milvus.sh ${env.RELEASE_NAME}"
  317. }
  318. }
  319. }
  320. }
  321. }
  322. success {
  323. echo 'I succeeeded!'
  324. container('main') {
  325. dir ('tests/python_client/chaos/scripts') {
  326. script {
  327. sh "bash uninstall_milvus.sh ${env.RELEASE_NAME} || true"
  328. }
  329. }
  330. }
  331. }
  332. unstable {
  333. echo 'I am unstable :/'
  334. }
  335. failure {
  336. echo 'I failed :('
  337. }
  338. changed {
  339. echo 'Things were different before...'
  340. }
  341. }
  342. }