From 540409513ee2a0c430d4d92c2b4fcd60924775c2 Mon Sep 17 00:00:00 2001 From: Jackcuii Date: Thu, 27 Nov 2025 20:39:08 -0600 Subject: [PATCH 01/17] copy template --- benchmarks/sregym/Dockerfile | 14 ++ benchmarks/sregym/README.md | 61 +++++++++ .../example_bench_benchmark_timestamp.jsonl | 3 + .../example_bench_pretrain_timestamp.jsonl | 1 + .../sft/example_bench_sft_timestamp.jsonl | 2 + benchmarks/sregym/env.toml | 12 ++ benchmarks/sregym/install.sh | 19 +++ benchmarks/sregym/requirements.txt | 5 + benchmarks/sregym/run.sh | 29 +++++ benchmarks/sregym/src/__init__.py | 1 + benchmarks/sregym/src/main.py | 120 ++++++++++++++++++ benchmarks/sregym/test.sh | 10 ++ benchmarks/sregym/tests/__init__.py | 0 benchmarks/sregym/tests/test_benchmark.py | 60 +++++++++ 14 files changed, 337 insertions(+) create mode 100644 benchmarks/sregym/Dockerfile create mode 100644 benchmarks/sregym/README.md create mode 100644 benchmarks/sregym/data/benchmark/example_bench_benchmark_timestamp.jsonl create mode 100644 benchmarks/sregym/data/pretrain/example_bench_pretrain_timestamp.jsonl create mode 100644 benchmarks/sregym/data/sft/example_bench_sft_timestamp.jsonl create mode 100644 benchmarks/sregym/env.toml create mode 100755 benchmarks/sregym/install.sh create mode 100644 benchmarks/sregym/requirements.txt create mode 100755 benchmarks/sregym/run.sh create mode 100644 benchmarks/sregym/src/__init__.py create mode 100644 benchmarks/sregym/src/main.py create mode 100755 benchmarks/sregym/test.sh create mode 100644 benchmarks/sregym/tests/__init__.py create mode 100644 benchmarks/sregym/tests/test_benchmark.py diff --git a/benchmarks/sregym/Dockerfile b/benchmarks/sregym/Dockerfile new file mode 100644 index 0000000..3e1536e --- /dev/null +++ b/benchmarks/sregym/Dockerfile @@ -0,0 +1,14 @@ +FROM ubuntu:24.04 + +WORKDIR /usr/src +COPY . . +RUN apt-get update && apt-get install -y \ + build-essential \ + git \ + wget \ + python3-pip \ + python3-venv + +RUN chmod +x install.sh test.sh && ./install.sh + +ENTRYPOINT ["./test.sh"] diff --git a/benchmarks/sregym/README.md b/benchmarks/sregym/README.md new file mode 100644 index 0000000..daa3db8 --- /dev/null +++ b/benchmarks/sregym/README.md @@ -0,0 +1,61 @@ +# YourBenchmarkName + +## Scenario Description + +Provide a summary of your scenarios here. This section should give an overview of the context, objectives, and key elements involved in your scenarios. + +### Task Details + +Describe your task in detail, including: + +- **Input**: Specify the type of input data required for the task. +- **Output**: Define the expected output from the task. +- **Evaluation**: Explain how to evaluate the output, including any metrics or criteria used to measure performance. + +## Benchmark Setup + +### Test in Docker + +To test your benchmark in a Docker container, follow these steps: + +1. Build the Docker image using the provided Dockerfile. You can do this by running the following command in the terminal: + + ```sh + docker build -t your_benchmark_image . + ``` + +2. Once the image is built, you can run it using the following command: + + ```sh + docker run -it --rm your_benchmark_image + # docker run --rm your_benchmark_image + ``` + +3. Inside the container, navigate to the appropriate directory and execute the benchmark script to start the testing process. + + ```sh + ./run.sh + ``` + +### Maunaly Test + +To manually test your benchmark, follow these steps: + +#### Install Dependencies + +To install and configure your benchmark, follow these steps: + +1. Run the `install.sh` script to set up the environment and install necessary dependencies. You can simply execute the following command: + + ```sh + ./install.sh + ``` + +#### Run + +To run your benchmark and obtain results for a specific task and model, follow these steps: + +1. Review the `run.sh` script to understand the expected commands and parameters. +2. Execute the `run.sh` script to start the benchmark. The script will guide you through the process and generate the results. + +Feel free to adjust the details to better fit your specific scenario and requirements. Let me know if there's anything else you need! diff --git a/benchmarks/sregym/data/benchmark/example_bench_benchmark_timestamp.jsonl b/benchmarks/sregym/data/benchmark/example_bench_benchmark_timestamp.jsonl new file mode 100644 index 0000000..c4b375f --- /dev/null +++ b/benchmarks/sregym/data/benchmark/example_bench_benchmark_timestamp.jsonl @@ -0,0 +1,3 @@ +{"id": "platform_1", "sys_prompt": "You are an expert on Kusto. Help me write kusto query based on the user requirement. DO NOT explain anything, DO give KQL directly. You should strictly use the following format:\n##### KQL\n```\ncluster(#CLUSTER_NAME#).database(#DATABASE_NAME#).#TABLE_NAME# | where #COLUMN_NAME# xxx\n```", "user_prompt": "Query the nodes are stuck in Linux MOS under cluster IAD02PrdGPC01", "thinking": "chain of thought", "response": "let targetCluster = \"IAD02PrdGPC01\";\ncluster(\"azuredcm\").database(\"AzureDCMDb\").dcmInventoryMachines \n| where AzureClusterName == targetCluster\n| join kind=inner \n( \n cluster('Oneboot').database('OneDhcp').OneDhcpApi\n | where PreciseTimeStamp > ago(7d)\n | where ApiName has 'SetDeviceOsType' \n | parse ApiParams with \"name=\" MachineName \",osType=\" OSType \",osPath=\" OSPath\n) on MachineName\n| project PreciseTimeStamp, AzureClusterName, AzureNodeId, MachineName, OSType, OSPath\n| summarize arg_max(PreciseTimeStamp, *) by AzureNodeId, MachineName\n| where OSPath == \"LinuxMOS\" ", "metadata": {"scenario": "Example", "subtask": "Example", "data_quality":"high", "XXX": "XXX"}} +{"id": "platform_2", "sys_prompt": "You are an expert on Kusto. Help me write kusto query based on the user requirement. DO NOT explain anything, DO give KQL directly. You should strictly use the following format:\n##### KQL\n```\ncluster(#CLUSTER_NAME#).database(#DATABASE_NAME#).#TABLE_NAME# | where #COLUMN_NAME# xxx\n```", "user_prompt": "Check the capacity of cluster SIN22PrdApp22, which is a non-GPU cluster", "thinking": "chain of thought", "response": "cluster(\"azcore.centralus\").database(\"AzureCP\").MycroftNodeHealthSnapshot\n| where PreciseTimeStamp >= ago(1d)\n| where ClusterName == \"SIN22PrdApp22\"\n| summarize arg_max(PreciseTimeStamp, *) by ClusterName, NodeId\n| project PreciseTimeStamp, ClusterName, NodeId, NsdState, AvailabilityState, ContainerCount\n| summarize NodeCount = count() by NsdState, AvailabilityState, ContainerCount", "metadata": {"scenario": "Example", "subtask": "Example", "data_quality":"high", "XXX": "XXX"}} +{"id": "platform_3", "sys_prompt": "You are an expert on Kusto. Help me write kusto query based on the user requirement. DO NOT explain anything, DO give KQL directly. You should strictly use the following format:\n##### KQL\n```\ncluster(#CLUSTER_NAME#).database(#DATABASE_NAME#).#TABLE_NAME# | where #COLUMN_NAME# xxx\n```", "user_prompt": "Check the capacity of cluster IAD02PrdGPC01, which is a GPU cluster", "thinking": "chain of thought", "response": "let cluster = \"IAD02PrdGPC01\";\ncluster(\"azcore.centralus\").database(\"AzureCP\").MycroftNodeSnapshot\n| where PreciseTimeStamp >= ago(1d)\n| where ClusterName == cluster\n| summarize arg_max(PreciseTimeStamp, *) by ClusterName, NodeId\n| where MachinePoolName contains \"MP2\"\n| project PreciseTimeStamp, ClusterName, NodeId, MachinePoolName\n| join (\ncluster(\"azcore.centralus\").database(\"AzureCP\").MycroftNodeHealthSnapshot\n| where PreciseTimeStamp >= ago(1d)\n| where ClusterName == cluster\n| summarize arg_max(PreciseTimeStamp, *) by ClusterName, NodeId\n| project PreciseTimeStamp, ClusterName, NodeId, NsdState, AvailabilityState, ContainerCount\n) on NodeId\n| summarize NodeCount = count() by NsdState, AvailabilityState, ContainerCount", "metadata": {"scenario": "Example", "subtask": "Example", "data_quality":"high", "XXX": "XXX"}} diff --git a/benchmarks/sregym/data/pretrain/example_bench_pretrain_timestamp.jsonl b/benchmarks/sregym/data/pretrain/example_bench_pretrain_timestamp.jsonl new file mode 100644 index 0000000..c44eafe --- /dev/null +++ b/benchmarks/sregym/data/pretrain/example_bench_pretrain_timestamp.jsonl @@ -0,0 +1 @@ +{"text": "text of one doc", "metadata": {"scenarios": "XXX", "subtask": "XXXX", "description": "xx", "link": "XXX", "XXX": "XXX"}} \ No newline at end of file diff --git a/benchmarks/sregym/data/sft/example_bench_sft_timestamp.jsonl b/benchmarks/sregym/data/sft/example_bench_sft_timestamp.jsonl new file mode 100644 index 0000000..26cadb9 --- /dev/null +++ b/benchmarks/sregym/data/sft/example_bench_sft_timestamp.jsonl @@ -0,0 +1,2 @@ +{"sys_prompt": "You are XXX", "user_prompt": "what", "thinking": "chain of thought", "response": "XXX", "metadata": {"scenario": "XX", "subtask": "XXX", "data_quality":"high", "XXX": "XXX"}} + \ No newline at end of file diff --git a/benchmarks/sregym/env.toml b/benchmarks/sregym/env.toml new file mode 100644 index 0000000..eac33ed --- /dev/null +++ b/benchmarks/sregym/env.toml @@ -0,0 +1,12 @@ +[llm] +AZURE_API_KEY = "XXX" +AZURE_API_BASE = "XXXX" +AZURE_API_VERSION = "XXX" +ANTHROPIC_API_KEY = "sk-XXXX" + +[hardware] +use_gpu = false + +[env-docker] +image = "default" +entrypoint = "./run.sh" \ No newline at end of file diff --git a/benchmarks/sregym/install.sh b/benchmarks/sregym/install.sh new file mode 100755 index 0000000..8a2c40c --- /dev/null +++ b/benchmarks/sregym/install.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -e # Exit immediately on error. + +# if .venv does not exist, create it +if [ -d ".venv" ]; then + echo "==> .venv already exists, skipping creation." +else + echo "==> Creating .venv directory..." + + python3 -m venv .venv + source .venv/bin/activate + pip install -r requirements.txt + pip install pytest + pip install pytest-cov + deactivate +fi + +echo "==> ExampleBench environment is set up successfully." diff --git a/benchmarks/sregym/requirements.txt b/benchmarks/sregym/requirements.txt new file mode 100644 index 0000000..f5e49c2 --- /dev/null +++ b/benchmarks/sregym/requirements.txt @@ -0,0 +1,5 @@ +sentence-transformers==4.0.1 +scikit-learn==1.6.1 +requests +azure-identity +litellm==1.77.5 \ No newline at end of file diff --git a/benchmarks/sregym/run.sh b/benchmarks/sregym/run.sh new file mode 100755 index 0000000..45f0f10 --- /dev/null +++ b/benchmarks/sregym/run.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +set -e # Exit immediately on error. + +if [ $# -ne 1 ]; then + echo "Usage: $0 " + echo "Example: $0 Qwen/Qwen2.5-7B-Instruct" + exit 1 +fi + +MODEL_NAME="$1" +NEW_MODEL_NAME="${MODEL_NAME//\//_}" + +# Note: set it to "openai" if you are using your own model server (vllm) +# Otherwise, set it to "azure" if you are using azure gpt endpoint +# Run self-serving model +# export OPENAI_API_TYPE="openai" +# export OPENAI_BASE_URL="http://localhost:2327/v1" +# export OPENAI_API_KEY="EMPTY" + +source .venv/bin/activate +echo "==> Start to run ExampleBench" +# Note that if you benchmark has multiple tasks, you need to add --task +# in your code to enable task selection. +python src/main.py \ + --model_name "${MODEL_NAME}" + # --save_path "./outputs/examplebench__${NEW_MODEL_NAME}__$(date +"%Y-%m-%d_%H-%M-%S")" \ + +deactivate diff --git a/benchmarks/sregym/src/__init__.py b/benchmarks/sregym/src/__init__.py new file mode 100644 index 0000000..284e62c --- /dev/null +++ b/benchmarks/sregym/src/__init__.py @@ -0,0 +1 @@ +"""Init file for the example_bench package.""" diff --git a/benchmarks/sregym/src/main.py b/benchmarks/sregym/src/main.py new file mode 100644 index 0000000..b078c17 --- /dev/null +++ b/benchmarks/sregym/src/main.py @@ -0,0 +1,120 @@ +"""Example for benchmarking the performance of a model on a specific task.""" + +import argparse +import json +import os +import sys +from datetime import datetime + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../'))) + +from sdk.utils import set_llm_endpoint_from_config + +set_llm_endpoint_from_config('env.toml') + +from sdk.evaluator import BasicEvaluator # noqa: E402 +from sdk.executor import SimpleExecutor # noqa: E402 + + +def main(_input_file, output_dir, _model_name, agent_name): + """Main function for running the benchmark.""" + total_score = [] + with ( + open(_input_file, encoding='utf-8') as data, + open(os.path.join(output_dir, 'result.jsonl'), 'w', encoding='utf-8') as output_file, + ): + for line in data: + item = json.loads(line) + print('============ ' + item['id'] + ' ============') + if agent_name == 'llm': + executor = SimpleExecutor(_model_name, item['sys_prompt']) + else: + # You can add more agents here + raise ValueError(f'Unknown agent name: {agent_name}') + response = executor.run(item['user_prompt']) + + evaluator = BasicEvaluator(_model_name) + offline_metrics = evaluator.eval(question=item['user_prompt'], answer=response, groundtruth=item) + + total_score.append( + ( + offline_metrics['syntax_acc'], + offline_metrics['exact_match'], + offline_metrics['jaccard_similarity'], + offline_metrics['cosine_similarity'], + offline_metrics['embeddings_similarity'], + offline_metrics['llmjudger_rating'], + ) + ) # drop llmjudger_answer + + result = { + 'id': item['id'], + 'sys_prompt': item['sys_prompt'], + 'user_prompt': item['user_prompt'], + 'groundtruth': item['response'], + 'response': response, + 'syntax_acc': offline_metrics['syntax_acc'], + 'exact_match': offline_metrics['exact_match'], + 'jaccard_similarity': offline_metrics['jaccard_similarity'], + 'cosine_similarity': offline_metrics['cosine_similarity'], + 'embeddings_similarity': offline_metrics['embeddings_similarity'], + 'llmjudger_rating': offline_metrics['llmjudger_rating'], + 'llmjudger_answer': offline_metrics['llmjudger_answer'], + } + print('Evaluation Result:') + print(result) + output_file.write(json.dumps(result)) + output_file.write('\n') + + avg_score = [sum(values) / len(values) for values in list(zip(*total_score))] + avg_score_dict = { + 'syntax_acc': avg_score[0], + 'exact_match': avg_score[1], + 'jaccard_similarity': avg_score[2], + 'cosine_similarity': avg_score[3], + 'embeddings_similarity': avg_score[4], + 'llmjudger_rating': avg_score[5], + 'final_score': sum(avg_score[:5]) / 5, # Average of the first five metrics + } + with open(os.path.join(output_dir, 'avg_score.json'), 'w', encoding='utf-8') as avg_score_file: + json.dump(avg_score_dict, avg_score_file, indent=4) + print('************ Final average score ************') + print(avg_score_dict) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='example benchmark') + parser.add_argument( + '-i', + '--input_file', + help='Benchmark input file', + default='./data/benchmark/example_bench_benchmark_timestamp.jsonl', + ) + parser.add_argument('-o', '--save_path', help='Result save path', default=None) + # Add a parameter for agent + parser.add_argument('-a', '--agent', help='Agent Name', default='llm') + + parser.add_argument( + '-m', + '--model_name', + help='Model Name', + ) + # Note that if your benchmark has multiple tasks, you need to add --task + # in your code to enable task selection. + parser.add_argument('-t', '--task', help='specify task in scenarios', default=None) + + args = parser.parse_args() + + model_name = args.model_name + input_file = args.input_file + save_path = args.save_path + + if save_path is None: + str_model_name = model_name.replace('/', '_') + timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') + save_path = os.path.join('./outputs', f'examplebench__{str_model_name}__{args.agent}__{timestamp}') + + save_path = os.path.abspath(os.path.expanduser(save_path)) + os.makedirs(save_path, exist_ok=True) + + main(input_file, save_path, model_name, agent_name=args.agent) diff --git a/benchmarks/sregym/test.sh b/benchmarks/sregym/test.sh new file mode 100755 index 0000000..00820da --- /dev/null +++ b/benchmarks/sregym/test.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +set -e # Exit immediately on error. + +source envexamplebench/bin/activate +pytest --version +pytest +deactivate + +echo "==> ExampleBench test is done successfully." diff --git a/benchmarks/sregym/tests/__init__.py b/benchmarks/sregym/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/benchmarks/sregym/tests/test_benchmark.py b/benchmarks/sregym/tests/test_benchmark.py new file mode 100644 index 0000000..550d61c --- /dev/null +++ b/benchmarks/sregym/tests/test_benchmark.py @@ -0,0 +1,60 @@ +"""Tests for the example benchmark.""" + +import json +import unittest +from pathlib import Path + + +class TestExampleBenchmark(unittest.TestCase): + def test_data_format(self): + """Test that benchmark data is in the correct format.""" + data_path = ( + Path(__file__).parent.parent + / "data" + / "benchmark" + / "example_bench_benchmark_timestamp.jsonl" + ) + + self.assertTrue( + data_path.exists(), f"Benchmark data file not found: {data_path}" + ) + + with open(data_path, encoding="utf-8") as f: + for line_num, line in enumerate(f, 1): + data = json.loads(line) + + # Check required fields + self.assertIn("id", data, f'Line {line_num}: missing "id" field') + self.assertIn( + "sys_prompt", data, f'Line {line_num}: missing "sys_prompt" field' + ) + self.assertIn( + "user_prompt", data, f'Line {line_num}: missing "user_prompt" field' + ) + self.assertIn( + "response", data, f'Line {line_num}: missing "response" field' + ) + + # Check field types + self.assertIsInstance( + data["id"], str, f'Line {line_num}: "id" must be a string' + ) + self.assertIsInstance( + data["sys_prompt"], + str, + f'Line {line_num}: "sys_prompt" must be a string', + ) + self.assertIsInstance( + data["user_prompt"], + str, + f'Line {line_num}: "user_prompt" must be a string', + ) + self.assertIsInstance( + data["response"], + str, + f'Line {line_num}: "response" must be a string', + ) + + +if __name__ == "__main__": + unittest.main() From 6d5eab07e630fb12c8f06ee5eda787c461430f69 Mon Sep 17 00:00:00 2001 From: Jackcuii Date: Thu, 27 Nov 2025 21:11:22 -0600 Subject: [PATCH 02/17] Squashed 'benchmarks/sregym/sregym_core/' content from commit 529f564e git-subtree-dir: benchmarks/sregym/sregym_core git-subtree-split: 529f564ef3c955db2ed82aac2f84c6eb05e078c7 --- .env.example | 64 + .github/.CODEOWNERS | 1 + .github/workflows/action.yml | 85 + .gitignore | 768 +++ .gitmodules | 3 + .pre-commit-config.yaml | 11 + .python-version | 1 + CONTRIBUTING.md | 326 ++ LICENSE.txt | 19 + Problem List.md | 89 + README.md | 93 + SREGym-applications | 1 + agents.yaml | 11 + assets/SREGymFigure.png | Bin 0 -> 330255 bytes cli.py | 214 + clients/__init__.py | 0 clients/autosubmit/autosubmit_agent.py | 31 + clients/stratus/README.md | 8 + clients/stratus/__init__.py | 0 .../configs/diagnosis_agent_config.yaml | 79 + .../configs/diagnosis_agent_prompts.yaml | 70 + .../stratus/configs/langgraph_tool_configs.py | 46 + .../configs/llm_summarization_prompt.yaml | 37 + .../configs/localization_agent_config.yaml | 89 + .../configs/localization_agent_prompts.yaml | 73 + .../configs/mitigation_agent_config.yaml | 74 + .../configs/mitigation_agent_prompts.yaml | 96 + .../configs/rollback_agent_config.yaml | 24 + .../configs/rollback_agent_prompts.yaml | 28 + clients/stratus/configs/stratus_config.py | 28 + clients/stratus/llm_backend/__init__.py | 0 .../stratus/llm_backend/get_llm_backend.py | 353 ++ clients/stratus/llm_backend/init_backend.py | 177 + clients/stratus/llm_backend/trim_util.py | 40 + clients/stratus/stratus_agent/README.md | 7 + clients/stratus/stratus_agent/__init__.py | 0 clients/stratus/stratus_agent/base_agent.py | 244 + .../stratus/stratus_agent/diagnosis_agent.py | 198 + .../stratus/stratus_agent/driver/__init__.py | 0 .../stratus/stratus_agent/driver/driver.py | 598 +++ .../stratus_agent/localization_agent.py | 79 + .../stratus/stratus_agent/mitigation_agent.py | 233 + .../stratus/stratus_agent/rollback_agent.py | 125 + clients/stratus/stratus_agent/state.py | 21 + clients/stratus/stratus_utils/__init__.py | 0 .../stratus_utils/ai_msg_mock_utils.py | 53 + clients/stratus/stratus_utils/get_logger.py | 7 + .../stratus_utils/get_starting_prompt.py | 16 + clients/stratus/stratus_utils/str_to_tool.py | 72 + .../stratus_utils/truncate_by_token.py | 23 + clients/stratus/tools/__init__.py | 0 clients/stratus/tools/basic_tool_node.py | 43 + clients/stratus/tools/compile/compile_tool.py | 59 + clients/stratus/tools/jaeger_tools.py | 248 + clients/stratus/tools/kubectl_tools.py | 234 + clients/stratus/tools/localization.py | 59 + clients/stratus/tools/prometheus_tools.py | 127 + .../stratus/tools/stateful_async_tool_node.py | 36 + clients/stratus/tools/stratus_tool_node.py | 127 + clients/stratus/tools/submit_tool.py | 127 + .../stratus/tools/text_editing/__init__.py | 0 clients/stratus/tools/text_editing/edit.py | 208 + .../stratus/tools/text_editing/file_manip.py | 415 ++ .../tools/text_editing/flake8_utils.py | 142 + clients/stratus/tools/text_editing/insert.py | 81 + .../tools/text_editing/windowed_file.py | 303 ++ clients/stratus/tools/wait_tool.py | 35 + clients/stratus/weak_oracles/__init__.py | 0 clients/stratus/weak_oracles/base_oracle.py | 20 + .../weak_oracles/cluster_state_oracle.py | 98 + .../stratus/weak_oracles/workload_oracle.py | 263 + dashboard/__init__.py | 0 dashboard/assets/utils.js | 17 + dashboard/dashboard_app.py | 1053 ++++ dashboard/proxy.py | 97 + kind/.dockerignore | 1 + kind/Dockerfile | 4 + kind/README.md | 162 + kind/kind-config-arm.yaml | 29 + kind/kind-config-x86.yaml | 29 + logger/__init__.py | 78 + logger/handler.py | 42 + main.py | 289 + mcp_server/__init__.py | 0 mcp_server/configs/__init__.py | 0 mcp_server/configs/kubectl_session_cfg.py | 14 + mcp_server/configs/kubectl_tool_cfg.py | 75 + mcp_server/configs/load_all_cfg.py | 49 + mcp_server/configs/mcp_server_cfg.py | 16 + mcp_server/example.txt | 1 + mcp_server/jaeger_server.py | 139 + mcp_server/kubectl_mcp_tools.py | 108 + mcp_server/kubectl_server_helper/__init__.py | 0 .../kubectl_server_helper/action_stack.py | 51 + .../kubectl_server_helper/cmd_category.py | 78 + mcp_server/kubectl_server_helper/kubectl.py | 183 + .../kubectl_cmd_runner.py | 269 + .../kubectl_server_helper/kubectl_tool_set.py | 18 + .../kubectl_server_helper/rollback_tool.py | 299 ++ .../sliding_lru_session_cache.py | 121 + mcp_server/kubectl_server_helper/utils.py | 50 + mcp_server/prometheus_server.py | 45 + mcp_server/sregym_mcp_server.py | 30 + mcp_server/submit_server.py | 129 + mcp_server/test_client.py | 146 + mcp_server/test_server.py | 45 + mcp_server/utils.py | 73 + provisioner/README.md | 177 + provisioner/__init__.py | 0 provisioner/cli.py | 680 +++ provisioner/cloudlab_provisioner.py | 333 ++ provisioner/config/__init__.py | 0 provisioner/config/settings.py | 93 + provisioner/daemon.py | 505 ++ provisioner/setup_daemon.sh | 57 + provisioner/state_manager.py | 364 ++ provisioner/utils/__init__.py | 0 provisioner/utils/email_sender.py | 160 + provisioner/utils/logger.py | 31 + provisioner/utils/parser.py | 113 + provisioner/utils/ssh.py | 181 + pyproject.toml | 184 + scripts/__init__.py | 0 scripts/ansible/README.md | 31 + scripts/ansible/ansible.cfg | 2 + scripts/ansible/inventory.yml.example | 19 + scripts/ansible/setup_cluster.yml | 815 +++ scripts/ansible/ssh/hosts.txt | 3 + scripts/ansible/ssh/keys.sh | 39 + scripts/ansible/tidb/README.md | 32 + .../tidb/local-volume-provisioner.yaml | 167 + scripts/ansible/tidb/tidb-cluster.yaml | 54 + scripts/ansible/tidb/tidb-dashboard.yaml | 23 + scripts/ansible/tidb/tidb-monitor.yaml | 24 + scripts/ansible/tidb/tidb-operator.yaml | 306 ++ .../ansible/tidb/tidb_operator_cluster.yml | 107 + scripts/ansible/tidb/tidb_pv_setup.yml | 173 + scripts/geni_lib/README.md | 180 + scripts/geni_lib/__init__.py | 0 scripts/geni_lib/cluster_setup.py | 357 ++ scripts/geni_lib/generate_rspec.py | 35 + scripts/geni_lib/genictl.py | 606 +++ .../geni_lib/mod/geni_lib_xlab-1.0.0.tar.gz | Bin 0 -> 104018 bytes scripts/geni_lib/quick_k8s_experiment.py | 104 + scripts/geni_lib/remote.py | 76 + scripts/geni_lib/rspecs/test.xml | 28 + scripts/terraform/.gitignore | 33 + scripts/terraform/README.md | 87 + scripts/terraform/data.tf | 3 + scripts/terraform/deploy.py | 215 + scripts/terraform/main.tf | 197 + scripts/terraform/outputs.tf | 19 + scripts/terraform/providers.tf | 23 + scripts/terraform/scripts/kube_controller.sh | 25 + scripts/terraform/scripts/kubeadm.sh | 136 + scripts/terraform/scripts/prom_on_worker.sh | 18 + scripts/terraform/scripts/setup_sregym.sh | 40 + scripts/terraform/ssh.tf | 35 + scripts/terraform/variables.tf | 31 + sregym/__init__.py | 5 + sregym/agent_launcher.py | 61 + sregym/agent_registry.py | 41 + sregym/conductor/__init__.py | 1 + sregym/conductor/conductor.py | 562 ++ sregym/conductor/conductor_api.py | 142 + sregym/conductor/constants.py | 8 + .../assign_non_existent_node_mitigation.py | 53 + sregym/conductor/oracles/base.py | 13 + sregym/conductor/oracles/compound.py | 60 + sregym/conductor/oracles/detection.py | 30 + sregym/conductor/oracles/diagnosis_oracle.py | 547 ++ .../oracles/dns_resolution_mitigation.py | 66 + .../conductor/oracles/imbalance_mitigation.py | 49 + .../oracles/incorrect_image_mitigation.py | 33 + sregym/conductor/oracles/incorrect_port.py | 43 + .../oracles/ingress_misroute_oracle.py | 31 + .../conductor/oracles/llm_as_a_judge/judge.py | 244 + .../llm_as_a_judge/llm_as_a_judge_oracle.py | 75 + sregym/conductor/oracles/localization.py | 61 + .../oracles/missing_cm_key_mitigation.py | 51 + .../missing_env_variable_mitigation.py | 71 + sregym/conductor/oracles/mitigation.py | 40 + .../namespace_memory_limit_mitigation.py | 41 + .../oracles/network_policy_oracle.py | 29 + .../invalid_affinity_mitigation.py | 123 + .../non_existent_storage_mitigation.py | 113 + .../overload_replicas_mitigation.py | 100 + .../security_context_mitigation.py | 113 + .../wrong_update_strategy_mitigation.py | 87 + ...ling_update_misconfiguration_mitigation.py | 57 + .../oracles/rpc_retry_storm_mitigation.py | 180 + .../oracles/scale_pod_zero_mitigation.py | 55 + .../oracles/service_endpoint_mitigation.py | 32 + .../conductor/oracles/sustained_readiness.py | 88 + .../oracles/target_port_mitigation.py | 53 + sregym/conductor/oracles/utils.py | 11 + .../oracles/valkey_auth_mitigation.py | 44 + sregym/conductor/oracles/workload.py | 37 + .../conductor/oracles/wrong_bin_mitigation.py | 37 + sregym/conductor/parser.py | 182 + sregym/conductor/problems/__init__.py | 0 .../conductor/problems/ad_service_failure.py | 32 + .../conductor/problems/ad_service_high_cpu.py | 33 + .../problems/ad_service_manual_gc.py | 32 + .../problems/assign_non_existent_node.py | 47 + .../conductor/problems/auth_miss_mongodb.py | 44 + sregym/conductor/problems/base.py | 28 + .../capacity_decrease_rpc_retry_storm.py | 55 + .../problems/cart_service_failure.py | 32 + sregym/conductor/problems/configmap_drift.py | 69 + .../problems/duplicate_pvc_mounts.py | 55 + .../problems/env_variable_shadowing.py | 45 + .../problems/faulty_image_correlated.py | 48 + .../problems/gc_capacity_degradation.py | 60 + sregym/conductor/problems/image_slow_load.py | 33 + sregym/conductor/problems/incorrect_image.py | 46 + .../problems/incorrect_port_assignment.py | 44 + sregym/conductor/problems/ingress_misroute.py | 77 + .../problems/kafka_queue_problems.py | 32 + sregym/conductor/problems/kubelet_crash.py | 42 + .../conductor/problems/latent_sector_error.py | 312 ++ .../liveness_probe_misconfiguration.py | 58 + .../problems/liveness_probe_too_aggressive.py | 46 + .../problems/load_spike_rpc_retry_storm.py | 54 + .../problems/loadgenerator_flood_homepage.py | 33 + sregym/conductor/problems/misconfig_app.py | 43 + .../conductor/problems/missing_configmap.py | 45 + .../problems/missing_env_variable.py | 53 + sregym/conductor/problems/missing_service.py | 54 + .../conductor/problems/multiple_failures.py | 55 + .../problems/namespace_memory_limit.py | 36 + .../problems/network_policy_block.py | 50 + .../operator_misoperation/IMPORTANT.md | 2 + .../operator_misoperation/__init__.py | 5 + .../invalid_affinity_toleration.py | 46 + .../non_existent_storage.py | 44 + .../overload_replicas.py | 45 + .../security_context_fault.py | 40 + .../wrong_update_strategy.py | 44 + .../problems/payment_service_failure.py | 32 + .../problems/payment_service_unreachable.py | 32 + .../persistent_volume_affinity_violation.py | 51 + .../problems/pod_anti_affinity_deadlock.py | 61 + sregym/conductor/problems/postgres.md | 54 + .../problems/product_catalog_failure.py | 32 + .../conductor/problems/pvc_claim_mismatch.py | 46 + .../problems/rbac_misconfiguration.py | 45 + sregym/conductor/problems/read_error.py | 52 + .../readiness_probe_misconfiguration.py | 55 + .../recommendation_service_cache_failure.py | 32 + sregym/conductor/problems/registry.py | 202 + sregym/conductor/problems/resource_request.py | 91 + sregym/conductor/problems/revoke_auth.py | 47 + .../problems/rolling_update_misconfigured.py | 45 + sregym/conductor/problems/scale_pod.py | 52 + .../service_dns_resolution_failure.py | 54 + .../problems/sidecar_port_conflict.py | 53 + .../problems/silent_data_corruption.py | 249 + .../problems/stale_coredns_config.py | 54 + .../problems/storage_user_unregistered.py | 50 + .../conductor/problems/taint_no_toleration.py | 62 + sregym/conductor/problems/target_port.py | 44 + sregym/conductor/problems/train_ticket_f22.py | 51 + sregym/conductor/problems/trainticket_f17.py | 51 + .../update_incompatible_correlated.py | 54 + .../problems/valkey_auth_disruption.py | 36 + .../problems/valkey_memory_disruption.py | 33 + .../conductor/problems/workload_imbalance.py | 58 + sregym/conductor/problems/wrong_bin_usage.py | 49 + sregym/conductor/problems/wrong_dns_policy.py | 54 + .../problems/wrong_service_selector.py | 54 + sregym/conductor/tasklist.yml.example | 260 + sregym/conductor/utils.py | 4 + sregym/generators/__init__.py | 0 sregym/generators/fault/__init__.py | 0 sregym/generators/fault/base.py | 65 + .../generators/fault/custom/slow_service.py | 42 + sregym/generators/fault/helpers.py | 120 + sregym/generators/fault/inject_app.py | 472 ++ sregym/generators/fault/inject_hw.py | 294 ++ sregym/generators/fault/inject_kernel.py | 776 +++ sregym/generators/fault/inject_operator.py | 249 + sregym/generators/fault/inject_os.py | 85 + sregym/generators/fault/inject_otel.py | 70 + sregym/generators/fault/inject_remote_os.py | 225 + sregym/generators/fault/inject_tt.py | 174 + sregym/generators/fault/inject_virtual.py | 2128 ++++++++ .../generators/fault/script/k8s-geo-mongo.sh | 38 + .../generators/fault/script/k8s-rate-mongo.sh | 38 + .../fault/script/remove-admin-mongo.sh | 12 + .../script/remove-mitigate-admin-geo-mongo.sh | 25 + .../remove-mitigate-admin-rate-mongo.sh | 26 + .../fault/script/revoke-admin-geo-mongo.sh | 15 + .../fault/script/revoke-admin-rate-mongo.sh | 15 + .../script/revoke-mitigate-admin-geo-mongo.sh | 17 + .../revoke-mitigate-admin-rate-mongo.sh | 17 + .../noise/transient_issues/chaos_injector.py | 73 + .../noise/transient_issues/configuration.yml | 10 + .../transient_issues/transient_issues.py | 1094 ++++ sregym/generators/workload/__init__.py | 0 sregym/generators/workload/base.py | 60 + .../workload/blueprint_hotel_work.py | 303 ++ .../workload/locust-fetcher-template.yaml | 28 + sregym/generators/workload/locust.py | 204 + sregym/generators/workload/stream.py | 105 + .../generators/workload/trainticket_locust.py | 152 + .../generators/workload/wrk-job-template.yaml | 22 + sregym/generators/workload/wrk2.py | 323 ++ sregym/observer/filebeat/.helmignore | 2 + sregym/observer/filebeat/Chart.yaml | 12 + sregym/observer/filebeat/Makefile | 1 + sregym/observer/filebeat/README.md | 278 + .../filebeat/examples/default/Makefile | 13 + .../filebeat/examples/default/README.md | 27 + .../filebeat/examples/default/test/goss.yaml | 43 + .../filebeat/examples/deployment/Makefile | 13 + .../filebeat/examples/deployment/README.md | 27 + .../examples/deployment/test/goss.yaml | 9 + .../filebeat/examples/deployment/values.yaml | 9 + .../observer/filebeat/examples/oss/Makefile | 13 + .../observer/filebeat/examples/oss/README.md | 27 + .../filebeat/examples/oss/test/goss.yaml | 25 + .../filebeat/examples/oss/values.yaml | 34 + .../filebeat/examples/security/Makefile | 13 + .../filebeat/examples/security/README.md | 28 + .../filebeat/examples/security/test/goss.yaml | 9 + .../filebeat/examples/security/values.yaml | 43 + .../filebeat/examples/upgrade/Makefile | 17 + .../filebeat/examples/upgrade/README.md | 21 + .../filebeat/examples/upgrade/test/goss.yaml | 48 + .../filebeat/examples/upgrade/values.yaml | 43 + sregym/observer/filebeat/templates/NOTES.txt | 2 + .../observer/filebeat/templates/_helpers.tpl | 32 + .../filebeat/templates/clusterrole.yaml | 12 + .../templates/clusterrolebinding.yaml | 19 + .../filebeat/templates/configmap.yaml | 53 + .../filebeat/templates/daemonset.yaml | 201 + .../filebeat/templates/deployment.yaml | 157 + sregym/observer/filebeat/templates/role.yaml | 14 + .../filebeat/templates/rolebinding.yaml | 19 + .../filebeat/templates/serviceaccount.yaml | 15 + sregym/observer/filebeat/values.yaml | 279 + sregym/observer/logstash/.helmignore | 2 + sregym/observer/logstash/Chart.yaml | 12 + sregym/observer/logstash/Makefile | 1 + sregym/observer/logstash/README.md | 261 + .../logstash/examples/default/Makefile | 14 + .../logstash/examples/default/README.md | 17 + .../logstash/examples/default/test/goss.yaml | 41 + .../logstash/examples/elasticsearch/Makefile | 15 + .../logstash/examples/elasticsearch/README.md | 28 + .../examples/elasticsearch/test/goss.yaml | 58 + .../examples/elasticsearch/values.yaml | 53 + .../observer/logstash/examples/oss/Makefile | 14 + .../observer/logstash/examples/oss/README.md | 17 + .../logstash/examples/oss/test/goss.yaml | 40 + .../logstash/examples/oss/values.yaml | 2 + .../logstash/examples/security/Makefile | 15 + .../logstash/examples/security/README.md | 28 + .../logstash/examples/security/test/goss.yaml | 62 + .../logstash/examples/security/values.yaml | 40 + .../logstash/examples/upgrade/Makefile | 16 + .../logstash/examples/upgrade/README.md | 19 + .../logstash/examples/upgrade/test/goss.yaml | 41 + .../logstash/examples/upgrade/values.yaml | 1 + sregym/observer/logstash/jaeger/jaeger.py | 104 + sregym/observer/logstash/jaeger/jaeger.yaml | 69 + sregym/observer/logstash/templates/NOTES.txt | 2 + .../observer/logstash/templates/_helpers.tpl | 27 + .../logstash/templates/configmap-config.yaml | 17 + .../logstash/templates/configmap-pattern.yaml | 17 + .../templates/configmap-pipeline.yaml | 17 + .../observer/logstash/templates/ingress.yaml | 68 + .../templates/poddisruptionbudget.yaml | 20 + .../logstash/templates/podsecuritypolicy.yaml | 14 + sregym/observer/logstash/templates/role.yaml | 25 + .../logstash/templates/rolebinding.yaml | 20 + .../observer/logstash/templates/secret.yaml | 27 + .../logstash/templates/service-headless.yaml | 20 + .../observer/logstash/templates/service.yaml | 32 + .../logstash/templates/serviceaccount.yaml | 22 + .../logstash/templates/statefulset.yaml | 237 + sregym/observer/logstash/values.yaml | 311 ++ sregym/observer/prometheus/prometheus-pvc.yml | 11 + .../prometheus/prometheus/.helmignore | 23 + .../observer/prometheus/prometheus/Chart.yaml | 53 + .../observer/prometheus/prometheus/README.md | 382 ++ .../charts/alertmanager/.helmignore | 25 + .../prometheus/charts/alertmanager/Chart.yaml | 23 + .../prometheus/charts/alertmanager/README.md | 62 + .../alertmanager/ci/config-reload-values.yaml | 2 + .../charts/alertmanager/templates/NOTES.txt | 21 + .../alertmanager/templates/_helpers.tpl | 92 + .../alertmanager/templates/configmap.yaml | 21 + .../alertmanager/templates/ingress.yaml | 44 + .../templates/ingressperreplica.yaml | 56 + .../charts/alertmanager/templates/pdb.yaml | 14 + .../templates/serviceaccount.yaml | 14 + .../templates/serviceperreplica.yaml | 44 + .../alertmanager/templates/services.yaml | 71 + .../alertmanager/templates/statefulset.yaml | 247 + .../templates/tests/test-connection.yaml | 20 + .../__snapshot__/ingress_test.yaml.snap | 25 + .../alertmanager/unittests/ingress_test.yaml | 43 + .../charts/alertmanager/values.schema.json | 915 ++++ .../charts/alertmanager/values.yaml | 363 ++ .../charts/kube-state-metrics/.helmignore | 21 + .../charts/kube-state-metrics/Chart.yaml | 26 + .../charts/kube-state-metrics/README.md | 85 + .../kube-state-metrics/templates/NOTES.txt | 23 + .../kube-state-metrics/templates/_helpers.tpl | 156 + .../templates/ciliumnetworkpolicy.yaml | 33 + .../templates/clusterrolebinding.yaml | 20 + .../templates/crs-configmap.yaml | 16 + .../templates/deployment.yaml | 290 + .../templates/extra-manifests.yaml | 4 + .../templates/kubeconfig-secret.yaml | 12 + .../templates/networkpolicy.yaml | 43 + .../kube-state-metrics/templates/pdb.yaml | 18 + .../templates/podsecuritypolicy.yaml | 39 + .../templates/psp-clusterrole.yaml | 19 + .../templates/psp-clusterrolebinding.yaml | 16 + .../templates/rbac-configmap.yaml | 22 + .../kube-state-metrics/templates/role.yaml | 212 + .../templates/rolebinding.yaml | 24 + .../kube-state-metrics/templates/service.yaml | 49 + .../templates/serviceaccount.yaml | 15 + .../templates/servicemonitor.yaml | 120 + .../templates/stsdiscovery-role.yaml | 26 + .../templates/stsdiscovery-rolebinding.yaml | 17 + .../templates/verticalpodautoscaler.yaml | 44 + .../charts/kube-state-metrics/values.yaml | 456 ++ .../prometheus-blackbox-exporter/Chart.yaml | 23 + .../prometheus-blackbox-exporter/README.md | 113 + .../ci/daemonset-values.yml | 1 + .../ci/default-values.yaml | 0 .../ci/extraenv-values.yaml | 3 + .../ci/hostAliases.yml | 9 + .../ci/ingress-values.yml | 2 + .../ci/networkpolicy-values.yaml | 2 + .../ci/secret-values.yaml | 1 + .../templates/NOTES.txt | 31 + .../templates/_helpers.tpl | 248 + .../templates/configmap.yaml | 12 + .../templates/daemonset.yaml | 27 + .../templates/deployment.yaml | 30 + .../templates/extra-manifests.yaml | 4 + .../templates/ingress.yaml | 66 + .../templates/networkpolicy.yaml | 28 + .../templates/poddisruptionbudget.yaml | 18 + .../templates/podmonitoring.yaml | 49 + .../templates/podsecuritypolicy.yaml | 41 + .../templates/prometheusrule.yaml | 20 + .../templates/role.yaml | 18 + .../templates/rolebinding.yaml | 16 + .../templates/selfpodmonitoring.yaml | 32 + .../templates/selfservicemonitor.yaml | 31 + .../templates/service.yaml | 31 + .../templates/serviceaccount.yaml | 13 + .../templates/servicemonitor.yaml | 62 + .../templates/verticalpodautoscaler.yaml | 44 + .../prometheus-blackbox-exporter/values.yaml | 399 ++ .../prometheus-node-exporter/.helmignore | 21 + .../prometheus-node-exporter/Chart.yaml | 25 + .../charts/prometheus-node-exporter/README.md | 96 + .../ci/port-values.yaml | 3 + .../templates/NOTES.txt | 29 + .../templates/_helpers.tpl | 185 + .../templates/clusterrole.yaml | 19 + .../templates/clusterrolebinding.yaml | 20 + .../templates/daemonset.yaml | 285 + .../templates/endpoints.yaml | 18 + .../templates/extra-manifests.yaml | 4 + .../templates/networkpolicy.yaml | 23 + .../templates/podmonitor.yaml | 91 + .../templates/psp-clusterrole.yaml | 14 + .../templates/psp-clusterrolebinding.yaml | 16 + .../templates/psp.yaml | 49 + .../templates/rbac-configmap.yaml | 16 + .../templates/service.yaml | 29 + .../templates/serviceaccount.yaml | 17 + .../templates/servicemonitor.yaml | 61 + .../templates/verticalpodautoscaler.yaml | 40 + .../prometheus-node-exporter/values.yaml | 481 ++ .../charts/prometheus-pushgateway/.helmignore | 24 + .../charts/prometheus-pushgateway/Chart.yaml | 24 + .../charts/prometheus-pushgateway/README.md | 88 + .../templates/NOTES.txt | 19 + .../templates/_helpers.tpl | 208 + .../templates/deployment.yaml | 28 + .../templates/ingress.yaml | 50 + .../templates/networkpolicy.yaml | 26 + .../prometheus-pushgateway/templates/pdb.yaml | 14 + .../templates/pushgateway-pvc.yaml | 29 + .../templates/service.yaml | 41 + .../templates/serviceaccount.yaml | 16 + .../templates/servicemonitor.yaml | 51 + .../templates/statefulset.yaml | 49 + .../charts/prometheus-pushgateway/values.yaml | 330 ++ .../prometheus/prometheus/templates/NOTES.txt | 113 + .../prometheus/templates/_helpers.tpl | 234 + .../prometheus/templates/clusterrole.yaml | 56 + .../templates/clusterrolebinding.yaml | 16 + .../prometheus/prometheus/templates/cm.yaml | 99 + .../prometheus/templates/deploy.yaml | 360 ++ .../prometheus/templates/extra-manifests.yaml | 4 + .../prometheus/templates/headless-svc.yaml | 35 + .../prometheus/templates/ingress.yaml | 57 + .../prometheus/templates/network-policy.yaml | 16 + .../prometheus/prometheus/templates/pdb.yaml | 15 + .../prometheus/prometheus/templates/psp.yaml | 53 + .../prometheus/prometheus/templates/pvc.yaml | 43 + .../prometheus/templates/rolebinding.yaml | 18 + .../prometheus/templates/service.yaml | 63 + .../prometheus/templates/serviceaccount.yaml | 14 + .../prometheus/prometheus/templates/sts.yaml | 382 ++ .../prometheus/prometheus/templates/vpa.yaml | 26 + .../tmpcharts-91028/alertmanager-1.7.0.tgz | Bin 0 -> 13358 bytes .../prometheus/prometheus/values.schema.json | 738 +++ .../prometheus/prometheus/values.yaml | 896 ++++ sregym/observer/tidb_cluster_deploy_helper.py | 21 + sregym/observer/tidb_prometheus.py | 81 + sregym/observer/trace_api.py | 365 ++ sregym/paths.py | 30 + .../trainticket/locust-deployment.yaml | 72 + sregym/resources/trainticket/locustfile.py | 250 + sregym/service/__init__.py | 0 sregym/service/apps/app_registry.py | 57 + sregym/service/apps/astronomy_shop.py | 77 + sregym/service/apps/base.py | 81 + .../apps/blueprint_hotel_reservation.py | 66 + sregym/service/apps/composite_app.py | 39 + sregym/service/apps/fleet_cast.py | 271 + sregym/service/apps/flight_ticket.py | 50 + sregym/service/apps/helpers.py | 8 + sregym/service/apps/hotel_reservation.py | 152 + sregym/service/apps/social_network.py | 109 + sregym/service/apps/tidb_cluster_operator.py | 296 ++ sregym/service/apps/train_ticket.py | 163 + sregym/service/dm_dust_manager.py | 242 + sregym/service/dm_flakey_manager.py | 243 + sregym/service/helm.py | 227 + sregym/service/khaos.py | 73 + sregym/service/khaos.yaml | 128 + sregym/service/kubectl.py | 651 +++ sregym/service/metadata/__init__.py | 0 sregym/service/metadata/astronomy-shop.json | 22 + .../metadata/blueprint-hotel-reservation.json | 13 + sregym/service/metadata/fleet-cast.json | 13 + sregym/service/metadata/flight-ticket.json | 13 + .../service/metadata/hotel-reservation.json | 16 + sregym/service/metadata/prometheus.json | 17 + sregym/service/metadata/social-network.json | 19 + .../service/metadata/tidb-with-operator.json | 20 + sregym/service/metadata/tidb_metadata.json | 20 + sregym/service/metadata/train-ticket.json | 13 + sregym/service/shell.py | 34 + sregym/service/telemetry/__init__.py | 0 sregym/service/telemetry/prometheus.py | 208 + sregym/utils/__init__.py | 0 sregym/utils/actions.py | 78 + sregym/utils/cache.py | 36 + sregym/utils/decorators.py | 16 + tests/e2e-testing-scripts/auto_submit.py | 23 + tests/e2e-testing-scripts/automating_tests.py | 549 ++ tests/e2e-testing-scripts/brew.sh | 7 + tests/e2e-testing-scripts/docker.sh | 22 + .../e2e-testing-scripts/e2e_testing_readme.md | 35 + tests/e2e-testing-scripts/go.sh | 7 + tests/e2e-testing-scripts/kind.sh | 40 + tests/e2e-testing-scripts/kubectl.sh | 1 + tests/e2e-testing-scripts/registry.txt | 86 + tests/file_editing/example.py | 6 + tests/file_editing/example.txt | 2 + .../test_create_and_insert_1.yaml | 19 + tests/file_editing/test_edit_1.yaml | 13 + tests/file_editing/test_file_editing_tool.py | 62 + tests/file_editing/test_goto_line_1.yaml | 12 + tests/file_editing/test_open_file_1.yaml | 9 + tests/file_editing/test_open_file_2.yaml | 13 + tests/geni-lib/__init__.py | 0 tests/geni-lib/test_deploy_sregym.py | 44 + tests/geni-lib/test_experiment_creation.py | 78 + .../kubectl_tool_set_test.py | 124 + tests/kubectl_tool_tests/nl2kubectl_agent.py | 214 + .../tests/create_del_test.yaml | 37 + .../kubectl_tool_tests/tests/patch_test.yaml | 15 + .../tests/preconditions.yaml | 24 + .../resources/custom_nginx_deployment.yaml | 20 + tests/provisioner/test_provisioner.py | 594 +++ tests/provisioner/test_ssh.py | 79 + tests/provisioner/test_state_manager.py | 46 + tests/sre-ql/cli.py | 834 +++ tests/sre-ql/codeql-pack.lock.yml | 4 + tests/sre-ql/qlpack.yml | 7 + tests/sre-ql/queries/absolute_paths.ql | 17 + .../application_checks.ql | 52 + tests/sre-ql/queries/fault_recover_checks.ql | 113 + .../sre-ql/queries/problem_attributes/app.ql | 48 + .../problem_attributes/diagnosis_oracle.ql | 48 + .../problem_attributes/faultyservice.ql | 48 + .../problem_attributes/mitigation_oracle.ql | 48 + .../queries/problem_attributes/namespace.ql | 48 + tests/sre-ql/queries/problem_lifecycle.ql | 121 + uv.lock | 4686 +++++++++++++++++ 605 files changed, 60442 insertions(+) create mode 100644 .env.example create mode 100644 .github/.CODEOWNERS create mode 100644 .github/workflows/action.yml create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 .pre-commit-config.yaml create mode 100644 .python-version create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE.txt create mode 100644 Problem List.md create mode 100644 README.md create mode 160000 SREGym-applications create mode 100644 agents.yaml create mode 100644 assets/SREGymFigure.png create mode 100644 cli.py create mode 100644 clients/__init__.py create mode 100644 clients/autosubmit/autosubmit_agent.py create mode 100644 clients/stratus/README.md create mode 100644 clients/stratus/__init__.py create mode 100644 clients/stratus/configs/diagnosis_agent_config.yaml create mode 100644 clients/stratus/configs/diagnosis_agent_prompts.yaml create mode 100644 clients/stratus/configs/langgraph_tool_configs.py create mode 100644 clients/stratus/configs/llm_summarization_prompt.yaml create mode 100644 clients/stratus/configs/localization_agent_config.yaml create mode 100644 clients/stratus/configs/localization_agent_prompts.yaml create mode 100644 clients/stratus/configs/mitigation_agent_config.yaml create mode 100644 clients/stratus/configs/mitigation_agent_prompts.yaml create mode 100644 clients/stratus/configs/rollback_agent_config.yaml create mode 100644 clients/stratus/configs/rollback_agent_prompts.yaml create mode 100644 clients/stratus/configs/stratus_config.py create mode 100644 clients/stratus/llm_backend/__init__.py create mode 100644 clients/stratus/llm_backend/get_llm_backend.py create mode 100644 clients/stratus/llm_backend/init_backend.py create mode 100644 clients/stratus/llm_backend/trim_util.py create mode 100644 clients/stratus/stratus_agent/README.md create mode 100644 clients/stratus/stratus_agent/__init__.py create mode 100644 clients/stratus/stratus_agent/base_agent.py create mode 100644 clients/stratus/stratus_agent/diagnosis_agent.py create mode 100644 clients/stratus/stratus_agent/driver/__init__.py create mode 100644 clients/stratus/stratus_agent/driver/driver.py create mode 100644 clients/stratus/stratus_agent/localization_agent.py create mode 100644 clients/stratus/stratus_agent/mitigation_agent.py create mode 100644 clients/stratus/stratus_agent/rollback_agent.py create mode 100644 clients/stratus/stratus_agent/state.py create mode 100644 clients/stratus/stratus_utils/__init__.py create mode 100644 clients/stratus/stratus_utils/ai_msg_mock_utils.py create mode 100644 clients/stratus/stratus_utils/get_logger.py create mode 100644 clients/stratus/stratus_utils/get_starting_prompt.py create mode 100644 clients/stratus/stratus_utils/str_to_tool.py create mode 100644 clients/stratus/stratus_utils/truncate_by_token.py create mode 100644 clients/stratus/tools/__init__.py create mode 100644 clients/stratus/tools/basic_tool_node.py create mode 100644 clients/stratus/tools/compile/compile_tool.py create mode 100644 clients/stratus/tools/jaeger_tools.py create mode 100644 clients/stratus/tools/kubectl_tools.py create mode 100644 clients/stratus/tools/localization.py create mode 100644 clients/stratus/tools/prometheus_tools.py create mode 100644 clients/stratus/tools/stateful_async_tool_node.py create mode 100644 clients/stratus/tools/stratus_tool_node.py create mode 100644 clients/stratus/tools/submit_tool.py create mode 100644 clients/stratus/tools/text_editing/__init__.py create mode 100644 clients/stratus/tools/text_editing/edit.py create mode 100644 clients/stratus/tools/text_editing/file_manip.py create mode 100644 clients/stratus/tools/text_editing/flake8_utils.py create mode 100644 clients/stratus/tools/text_editing/insert.py create mode 100644 clients/stratus/tools/text_editing/windowed_file.py create mode 100644 clients/stratus/tools/wait_tool.py create mode 100644 clients/stratus/weak_oracles/__init__.py create mode 100644 clients/stratus/weak_oracles/base_oracle.py create mode 100644 clients/stratus/weak_oracles/cluster_state_oracle.py create mode 100644 clients/stratus/weak_oracles/workload_oracle.py create mode 100644 dashboard/__init__.py create mode 100644 dashboard/assets/utils.js create mode 100644 dashboard/dashboard_app.py create mode 100644 dashboard/proxy.py create mode 100644 kind/.dockerignore create mode 100644 kind/Dockerfile create mode 100644 kind/README.md create mode 100644 kind/kind-config-arm.yaml create mode 100644 kind/kind-config-x86.yaml create mode 100644 logger/__init__.py create mode 100644 logger/handler.py create mode 100644 main.py create mode 100644 mcp_server/__init__.py create mode 100644 mcp_server/configs/__init__.py create mode 100644 mcp_server/configs/kubectl_session_cfg.py create mode 100644 mcp_server/configs/kubectl_tool_cfg.py create mode 100644 mcp_server/configs/load_all_cfg.py create mode 100644 mcp_server/configs/mcp_server_cfg.py create mode 100644 mcp_server/example.txt create mode 100644 mcp_server/jaeger_server.py create mode 100644 mcp_server/kubectl_mcp_tools.py create mode 100644 mcp_server/kubectl_server_helper/__init__.py create mode 100644 mcp_server/kubectl_server_helper/action_stack.py create mode 100644 mcp_server/kubectl_server_helper/cmd_category.py create mode 100644 mcp_server/kubectl_server_helper/kubectl.py create mode 100644 mcp_server/kubectl_server_helper/kubectl_cmd_runner.py create mode 100644 mcp_server/kubectl_server_helper/kubectl_tool_set.py create mode 100644 mcp_server/kubectl_server_helper/rollback_tool.py create mode 100644 mcp_server/kubectl_server_helper/sliding_lru_session_cache.py create mode 100644 mcp_server/kubectl_server_helper/utils.py create mode 100644 mcp_server/prometheus_server.py create mode 100644 mcp_server/sregym_mcp_server.py create mode 100644 mcp_server/submit_server.py create mode 100644 mcp_server/test_client.py create mode 100644 mcp_server/test_server.py create mode 100644 mcp_server/utils.py create mode 100644 provisioner/README.md create mode 100644 provisioner/__init__.py create mode 100644 provisioner/cli.py create mode 100644 provisioner/cloudlab_provisioner.py create mode 100644 provisioner/config/__init__.py create mode 100644 provisioner/config/settings.py create mode 100755 provisioner/daemon.py create mode 100755 provisioner/setup_daemon.sh create mode 100644 provisioner/state_manager.py create mode 100644 provisioner/utils/__init__.py create mode 100644 provisioner/utils/email_sender.py create mode 100644 provisioner/utils/logger.py create mode 100644 provisioner/utils/parser.py create mode 100644 provisioner/utils/ssh.py create mode 100644 pyproject.toml create mode 100644 scripts/__init__.py create mode 100644 scripts/ansible/README.md create mode 100644 scripts/ansible/ansible.cfg create mode 100644 scripts/ansible/inventory.yml.example create mode 100644 scripts/ansible/setup_cluster.yml create mode 100644 scripts/ansible/ssh/hosts.txt create mode 100755 scripts/ansible/ssh/keys.sh create mode 100644 scripts/ansible/tidb/README.md create mode 100644 scripts/ansible/tidb/local-volume-provisioner.yaml create mode 100644 scripts/ansible/tidb/tidb-cluster.yaml create mode 100644 scripts/ansible/tidb/tidb-dashboard.yaml create mode 100644 scripts/ansible/tidb/tidb-monitor.yaml create mode 100644 scripts/ansible/tidb/tidb-operator.yaml create mode 100644 scripts/ansible/tidb/tidb_operator_cluster.yml create mode 100644 scripts/ansible/tidb/tidb_pv_setup.yml create mode 100644 scripts/geni_lib/README.md create mode 100644 scripts/geni_lib/__init__.py create mode 100644 scripts/geni_lib/cluster_setup.py create mode 100644 scripts/geni_lib/generate_rspec.py create mode 100644 scripts/geni_lib/genictl.py create mode 100644 scripts/geni_lib/mod/geni_lib_xlab-1.0.0.tar.gz create mode 100644 scripts/geni_lib/quick_k8s_experiment.py create mode 100644 scripts/geni_lib/remote.py create mode 100644 scripts/geni_lib/rspecs/test.xml create mode 100644 scripts/terraform/.gitignore create mode 100644 scripts/terraform/README.md create mode 100644 scripts/terraform/data.tf create mode 100644 scripts/terraform/deploy.py create mode 100644 scripts/terraform/main.tf create mode 100644 scripts/terraform/outputs.tf create mode 100644 scripts/terraform/providers.tf create mode 100644 scripts/terraform/scripts/kube_controller.sh create mode 100644 scripts/terraform/scripts/kubeadm.sh create mode 100644 scripts/terraform/scripts/prom_on_worker.sh create mode 100644 scripts/terraform/scripts/setup_sregym.sh create mode 100644 scripts/terraform/ssh.tf create mode 100644 scripts/terraform/variables.tf create mode 100644 sregym/__init__.py create mode 100644 sregym/agent_launcher.py create mode 100644 sregym/agent_registry.py create mode 100644 sregym/conductor/__init__.py create mode 100644 sregym/conductor/conductor.py create mode 100644 sregym/conductor/conductor_api.py create mode 100644 sregym/conductor/constants.py create mode 100644 sregym/conductor/oracles/assign_non_existent_node_mitigation.py create mode 100644 sregym/conductor/oracles/base.py create mode 100644 sregym/conductor/oracles/compound.py create mode 100644 sregym/conductor/oracles/detection.py create mode 100644 sregym/conductor/oracles/diagnosis_oracle.py create mode 100644 sregym/conductor/oracles/dns_resolution_mitigation.py create mode 100644 sregym/conductor/oracles/imbalance_mitigation.py create mode 100644 sregym/conductor/oracles/incorrect_image_mitigation.py create mode 100644 sregym/conductor/oracles/incorrect_port.py create mode 100644 sregym/conductor/oracles/ingress_misroute_oracle.py create mode 100644 sregym/conductor/oracles/llm_as_a_judge/judge.py create mode 100644 sregym/conductor/oracles/llm_as_a_judge/llm_as_a_judge_oracle.py create mode 100644 sregym/conductor/oracles/localization.py create mode 100644 sregym/conductor/oracles/missing_cm_key_mitigation.py create mode 100644 sregym/conductor/oracles/missing_env_variable_mitigation.py create mode 100644 sregym/conductor/oracles/mitigation.py create mode 100644 sregym/conductor/oracles/namespace_memory_limit_mitigation.py create mode 100644 sregym/conductor/oracles/network_policy_oracle.py create mode 100644 sregym/conductor/oracles/operator_misoperation/invalid_affinity_mitigation.py create mode 100644 sregym/conductor/oracles/operator_misoperation/non_existent_storage_mitigation.py create mode 100644 sregym/conductor/oracles/operator_misoperation/overload_replicas_mitigation.py create mode 100644 sregym/conductor/oracles/operator_misoperation/security_context_mitigation.py create mode 100644 sregym/conductor/oracles/operator_misoperation/wrong_update_strategy_mitigation.py create mode 100644 sregym/conductor/oracles/rolling_update_misconfiguration_mitigation.py create mode 100644 sregym/conductor/oracles/rpc_retry_storm_mitigation.py create mode 100644 sregym/conductor/oracles/scale_pod_zero_mitigation.py create mode 100644 sregym/conductor/oracles/service_endpoint_mitigation.py create mode 100644 sregym/conductor/oracles/sustained_readiness.py create mode 100644 sregym/conductor/oracles/target_port_mitigation.py create mode 100644 sregym/conductor/oracles/utils.py create mode 100644 sregym/conductor/oracles/valkey_auth_mitigation.py create mode 100644 sregym/conductor/oracles/workload.py create mode 100644 sregym/conductor/oracles/wrong_bin_mitigation.py create mode 100644 sregym/conductor/parser.py create mode 100644 sregym/conductor/problems/__init__.py create mode 100644 sregym/conductor/problems/ad_service_failure.py create mode 100644 sregym/conductor/problems/ad_service_high_cpu.py create mode 100644 sregym/conductor/problems/ad_service_manual_gc.py create mode 100644 sregym/conductor/problems/assign_non_existent_node.py create mode 100644 sregym/conductor/problems/auth_miss_mongodb.py create mode 100644 sregym/conductor/problems/base.py create mode 100644 sregym/conductor/problems/capacity_decrease_rpc_retry_storm.py create mode 100644 sregym/conductor/problems/cart_service_failure.py create mode 100644 sregym/conductor/problems/configmap_drift.py create mode 100644 sregym/conductor/problems/duplicate_pvc_mounts.py create mode 100644 sregym/conductor/problems/env_variable_shadowing.py create mode 100644 sregym/conductor/problems/faulty_image_correlated.py create mode 100644 sregym/conductor/problems/gc_capacity_degradation.py create mode 100644 sregym/conductor/problems/image_slow_load.py create mode 100644 sregym/conductor/problems/incorrect_image.py create mode 100644 sregym/conductor/problems/incorrect_port_assignment.py create mode 100644 sregym/conductor/problems/ingress_misroute.py create mode 100644 sregym/conductor/problems/kafka_queue_problems.py create mode 100644 sregym/conductor/problems/kubelet_crash.py create mode 100644 sregym/conductor/problems/latent_sector_error.py create mode 100644 sregym/conductor/problems/liveness_probe_misconfiguration.py create mode 100644 sregym/conductor/problems/liveness_probe_too_aggressive.py create mode 100644 sregym/conductor/problems/load_spike_rpc_retry_storm.py create mode 100644 sregym/conductor/problems/loadgenerator_flood_homepage.py create mode 100644 sregym/conductor/problems/misconfig_app.py create mode 100644 sregym/conductor/problems/missing_configmap.py create mode 100644 sregym/conductor/problems/missing_env_variable.py create mode 100644 sregym/conductor/problems/missing_service.py create mode 100644 sregym/conductor/problems/multiple_failures.py create mode 100644 sregym/conductor/problems/namespace_memory_limit.py create mode 100644 sregym/conductor/problems/network_policy_block.py create mode 100644 sregym/conductor/problems/operator_misoperation/IMPORTANT.md create mode 100644 sregym/conductor/problems/operator_misoperation/__init__.py create mode 100644 sregym/conductor/problems/operator_misoperation/invalid_affinity_toleration.py create mode 100644 sregym/conductor/problems/operator_misoperation/non_existent_storage.py create mode 100644 sregym/conductor/problems/operator_misoperation/overload_replicas.py create mode 100644 sregym/conductor/problems/operator_misoperation/security_context_fault.py create mode 100644 sregym/conductor/problems/operator_misoperation/wrong_update_strategy.py create mode 100644 sregym/conductor/problems/payment_service_failure.py create mode 100644 sregym/conductor/problems/payment_service_unreachable.py create mode 100644 sregym/conductor/problems/persistent_volume_affinity_violation.py create mode 100644 sregym/conductor/problems/pod_anti_affinity_deadlock.py create mode 100644 sregym/conductor/problems/postgres.md create mode 100644 sregym/conductor/problems/product_catalog_failure.py create mode 100644 sregym/conductor/problems/pvc_claim_mismatch.py create mode 100644 sregym/conductor/problems/rbac_misconfiguration.py create mode 100644 sregym/conductor/problems/read_error.py create mode 100644 sregym/conductor/problems/readiness_probe_misconfiguration.py create mode 100644 sregym/conductor/problems/recommendation_service_cache_failure.py create mode 100644 sregym/conductor/problems/registry.py create mode 100644 sregym/conductor/problems/resource_request.py create mode 100644 sregym/conductor/problems/revoke_auth.py create mode 100644 sregym/conductor/problems/rolling_update_misconfigured.py create mode 100644 sregym/conductor/problems/scale_pod.py create mode 100644 sregym/conductor/problems/service_dns_resolution_failure.py create mode 100644 sregym/conductor/problems/sidecar_port_conflict.py create mode 100644 sregym/conductor/problems/silent_data_corruption.py create mode 100644 sregym/conductor/problems/stale_coredns_config.py create mode 100644 sregym/conductor/problems/storage_user_unregistered.py create mode 100644 sregym/conductor/problems/taint_no_toleration.py create mode 100644 sregym/conductor/problems/target_port.py create mode 100644 sregym/conductor/problems/train_ticket_f22.py create mode 100644 sregym/conductor/problems/trainticket_f17.py create mode 100644 sregym/conductor/problems/update_incompatible_correlated.py create mode 100644 sregym/conductor/problems/valkey_auth_disruption.py create mode 100644 sregym/conductor/problems/valkey_memory_disruption.py create mode 100644 sregym/conductor/problems/workload_imbalance.py create mode 100644 sregym/conductor/problems/wrong_bin_usage.py create mode 100644 sregym/conductor/problems/wrong_dns_policy.py create mode 100644 sregym/conductor/problems/wrong_service_selector.py create mode 100644 sregym/conductor/tasklist.yml.example create mode 100644 sregym/conductor/utils.py create mode 100644 sregym/generators/__init__.py create mode 100644 sregym/generators/fault/__init__.py create mode 100644 sregym/generators/fault/base.py create mode 100644 sregym/generators/fault/custom/slow_service.py create mode 100644 sregym/generators/fault/helpers.py create mode 100644 sregym/generators/fault/inject_app.py create mode 100644 sregym/generators/fault/inject_hw.py create mode 100644 sregym/generators/fault/inject_kernel.py create mode 100644 sregym/generators/fault/inject_operator.py create mode 100644 sregym/generators/fault/inject_os.py create mode 100644 sregym/generators/fault/inject_otel.py create mode 100644 sregym/generators/fault/inject_remote_os.py create mode 100644 sregym/generators/fault/inject_tt.py create mode 100644 sregym/generators/fault/inject_virtual.py create mode 100644 sregym/generators/fault/script/k8s-geo-mongo.sh create mode 100644 sregym/generators/fault/script/k8s-rate-mongo.sh create mode 100644 sregym/generators/fault/script/remove-admin-mongo.sh create mode 100644 sregym/generators/fault/script/remove-mitigate-admin-geo-mongo.sh create mode 100644 sregym/generators/fault/script/remove-mitigate-admin-rate-mongo.sh create mode 100644 sregym/generators/fault/script/revoke-admin-geo-mongo.sh create mode 100644 sregym/generators/fault/script/revoke-admin-rate-mongo.sh create mode 100644 sregym/generators/fault/script/revoke-mitigate-admin-geo-mongo.sh create mode 100644 sregym/generators/fault/script/revoke-mitigate-admin-rate-mongo.sh create mode 100644 sregym/generators/noise/transient_issues/chaos_injector.py create mode 100644 sregym/generators/noise/transient_issues/configuration.yml create mode 100644 sregym/generators/noise/transient_issues/transient_issues.py create mode 100644 sregym/generators/workload/__init__.py create mode 100644 sregym/generators/workload/base.py create mode 100644 sregym/generators/workload/blueprint_hotel_work.py create mode 100644 sregym/generators/workload/locust-fetcher-template.yaml create mode 100644 sregym/generators/workload/locust.py create mode 100644 sregym/generators/workload/stream.py create mode 100644 sregym/generators/workload/trainticket_locust.py create mode 100644 sregym/generators/workload/wrk-job-template.yaml create mode 100644 sregym/generators/workload/wrk2.py create mode 100644 sregym/observer/filebeat/.helmignore create mode 100644 sregym/observer/filebeat/Chart.yaml create mode 100644 sregym/observer/filebeat/Makefile create mode 100644 sregym/observer/filebeat/README.md create mode 100644 sregym/observer/filebeat/examples/default/Makefile create mode 100644 sregym/observer/filebeat/examples/default/README.md create mode 100644 sregym/observer/filebeat/examples/default/test/goss.yaml create mode 100644 sregym/observer/filebeat/examples/deployment/Makefile create mode 100644 sregym/observer/filebeat/examples/deployment/README.md create mode 100644 sregym/observer/filebeat/examples/deployment/test/goss.yaml create mode 100644 sregym/observer/filebeat/examples/deployment/values.yaml create mode 100644 sregym/observer/filebeat/examples/oss/Makefile create mode 100644 sregym/observer/filebeat/examples/oss/README.md create mode 100644 sregym/observer/filebeat/examples/oss/test/goss.yaml create mode 100644 sregym/observer/filebeat/examples/oss/values.yaml create mode 100644 sregym/observer/filebeat/examples/security/Makefile create mode 100644 sregym/observer/filebeat/examples/security/README.md create mode 100644 sregym/observer/filebeat/examples/security/test/goss.yaml create mode 100644 sregym/observer/filebeat/examples/security/values.yaml create mode 100644 sregym/observer/filebeat/examples/upgrade/Makefile create mode 100644 sregym/observer/filebeat/examples/upgrade/README.md create mode 100644 sregym/observer/filebeat/examples/upgrade/test/goss.yaml create mode 100644 sregym/observer/filebeat/examples/upgrade/values.yaml create mode 100644 sregym/observer/filebeat/templates/NOTES.txt create mode 100644 sregym/observer/filebeat/templates/_helpers.tpl create mode 100644 sregym/observer/filebeat/templates/clusterrole.yaml create mode 100644 sregym/observer/filebeat/templates/clusterrolebinding.yaml create mode 100644 sregym/observer/filebeat/templates/configmap.yaml create mode 100644 sregym/observer/filebeat/templates/daemonset.yaml create mode 100644 sregym/observer/filebeat/templates/deployment.yaml create mode 100644 sregym/observer/filebeat/templates/role.yaml create mode 100644 sregym/observer/filebeat/templates/rolebinding.yaml create mode 100644 sregym/observer/filebeat/templates/serviceaccount.yaml create mode 100644 sregym/observer/filebeat/values.yaml create mode 100644 sregym/observer/logstash/.helmignore create mode 100644 sregym/observer/logstash/Chart.yaml create mode 100644 sregym/observer/logstash/Makefile create mode 100644 sregym/observer/logstash/README.md create mode 100644 sregym/observer/logstash/examples/default/Makefile create mode 100644 sregym/observer/logstash/examples/default/README.md create mode 100644 sregym/observer/logstash/examples/default/test/goss.yaml create mode 100644 sregym/observer/logstash/examples/elasticsearch/Makefile create mode 100644 sregym/observer/logstash/examples/elasticsearch/README.md create mode 100644 sregym/observer/logstash/examples/elasticsearch/test/goss.yaml create mode 100644 sregym/observer/logstash/examples/elasticsearch/values.yaml create mode 100644 sregym/observer/logstash/examples/oss/Makefile create mode 100644 sregym/observer/logstash/examples/oss/README.md create mode 100644 sregym/observer/logstash/examples/oss/test/goss.yaml create mode 100644 sregym/observer/logstash/examples/oss/values.yaml create mode 100644 sregym/observer/logstash/examples/security/Makefile create mode 100644 sregym/observer/logstash/examples/security/README.md create mode 100644 sregym/observer/logstash/examples/security/test/goss.yaml create mode 100644 sregym/observer/logstash/examples/security/values.yaml create mode 100644 sregym/observer/logstash/examples/upgrade/Makefile create mode 100644 sregym/observer/logstash/examples/upgrade/README.md create mode 100644 sregym/observer/logstash/examples/upgrade/test/goss.yaml create mode 100644 sregym/observer/logstash/examples/upgrade/values.yaml create mode 100644 sregym/observer/logstash/jaeger/jaeger.py create mode 100644 sregym/observer/logstash/jaeger/jaeger.yaml create mode 100644 sregym/observer/logstash/templates/NOTES.txt create mode 100644 sregym/observer/logstash/templates/_helpers.tpl create mode 100644 sregym/observer/logstash/templates/configmap-config.yaml create mode 100644 sregym/observer/logstash/templates/configmap-pattern.yaml create mode 100644 sregym/observer/logstash/templates/configmap-pipeline.yaml create mode 100644 sregym/observer/logstash/templates/ingress.yaml create mode 100644 sregym/observer/logstash/templates/poddisruptionbudget.yaml create mode 100644 sregym/observer/logstash/templates/podsecuritypolicy.yaml create mode 100644 sregym/observer/logstash/templates/role.yaml create mode 100644 sregym/observer/logstash/templates/rolebinding.yaml create mode 100644 sregym/observer/logstash/templates/secret.yaml create mode 100644 sregym/observer/logstash/templates/service-headless.yaml create mode 100644 sregym/observer/logstash/templates/service.yaml create mode 100644 sregym/observer/logstash/templates/serviceaccount.yaml create mode 100644 sregym/observer/logstash/templates/statefulset.yaml create mode 100644 sregym/observer/logstash/values.yaml create mode 100644 sregym/observer/prometheus/prometheus-pvc.yml create mode 100644 sregym/observer/prometheus/prometheus/.helmignore create mode 100644 sregym/observer/prometheus/prometheus/Chart.yaml create mode 100644 sregym/observer/prometheus/prometheus/README.md create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/.helmignore create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/Chart.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/README.md create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/ci/config-reload-values.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/templates/NOTES.txt create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/templates/_helpers.tpl create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/templates/configmap.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/templates/ingress.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/templates/ingressperreplica.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/templates/pdb.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/templates/serviceaccount.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/templates/serviceperreplica.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/templates/services.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/templates/statefulset.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/templates/tests/test-connection.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/unittests/__snapshot__/ingress_test.yaml.snap create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/unittests/ingress_test.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/values.schema.json create mode 100644 sregym/observer/prometheus/prometheus/charts/alertmanager/values.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/.helmignore create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/Chart.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/README.md create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/NOTES.txt create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/_helpers.tpl create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/ciliumnetworkpolicy.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/clusterrolebinding.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/crs-configmap.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/deployment.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/extra-manifests.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/kubeconfig-secret.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/networkpolicy.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/pdb.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/podsecuritypolicy.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/psp-clusterrole.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/psp-clusterrolebinding.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/rbac-configmap.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/role.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/rolebinding.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/service.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/serviceaccount.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/servicemonitor.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/stsdiscovery-role.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/stsdiscovery-rolebinding.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/verticalpodautoscaler.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/kube-state-metrics/values.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/Chart.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/README.md create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/daemonset-values.yml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/default-values.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/extraenv-values.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/hostAliases.yml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/ingress-values.yml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/networkpolicy-values.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/secret-values.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/NOTES.txt create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/_helpers.tpl create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/configmap.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/daemonset.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/deployment.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/extra-manifests.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/ingress.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/networkpolicy.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/poddisruptionbudget.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/podmonitoring.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/podsecuritypolicy.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/prometheusrule.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/role.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/rolebinding.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/selfpodmonitoring.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/selfservicemonitor.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/service.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/serviceaccount.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/servicemonitor.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/verticalpodautoscaler.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/values.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/.helmignore create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/Chart.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/README.md create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/ci/port-values.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/NOTES.txt create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/_helpers.tpl create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/clusterrole.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/clusterrolebinding.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/daemonset.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/endpoints.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/extra-manifests.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/networkpolicy.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/podmonitor.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/psp-clusterrole.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/psp-clusterrolebinding.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/psp.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/rbac-configmap.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/service.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/serviceaccount.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/servicemonitor.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/verticalpodautoscaler.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/values.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/.helmignore create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/Chart.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/README.md create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/NOTES.txt create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/_helpers.tpl create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/deployment.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/ingress.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/networkpolicy.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/pdb.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/pushgateway-pvc.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/service.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/serviceaccount.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/servicemonitor.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/statefulset.yaml create mode 100644 sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/values.yaml create mode 100644 sregym/observer/prometheus/prometheus/templates/NOTES.txt create mode 100644 sregym/observer/prometheus/prometheus/templates/_helpers.tpl create mode 100644 sregym/observer/prometheus/prometheus/templates/clusterrole.yaml create mode 100644 sregym/observer/prometheus/prometheus/templates/clusterrolebinding.yaml create mode 100644 sregym/observer/prometheus/prometheus/templates/cm.yaml create mode 100644 sregym/observer/prometheus/prometheus/templates/deploy.yaml create mode 100644 sregym/observer/prometheus/prometheus/templates/extra-manifests.yaml create mode 100644 sregym/observer/prometheus/prometheus/templates/headless-svc.yaml create mode 100644 sregym/observer/prometheus/prometheus/templates/ingress.yaml create mode 100644 sregym/observer/prometheus/prometheus/templates/network-policy.yaml create mode 100644 sregym/observer/prometheus/prometheus/templates/pdb.yaml create mode 100644 sregym/observer/prometheus/prometheus/templates/psp.yaml create mode 100644 sregym/observer/prometheus/prometheus/templates/pvc.yaml create mode 100644 sregym/observer/prometheus/prometheus/templates/rolebinding.yaml create mode 100644 sregym/observer/prometheus/prometheus/templates/service.yaml create mode 100644 sregym/observer/prometheus/prometheus/templates/serviceaccount.yaml create mode 100644 sregym/observer/prometheus/prometheus/templates/sts.yaml create mode 100644 sregym/observer/prometheus/prometheus/templates/vpa.yaml create mode 100644 sregym/observer/prometheus/prometheus/tmpcharts-91028/alertmanager-1.7.0.tgz create mode 100644 sregym/observer/prometheus/prometheus/values.schema.json create mode 100644 sregym/observer/prometheus/prometheus/values.yaml create mode 100644 sregym/observer/tidb_cluster_deploy_helper.py create mode 100644 sregym/observer/tidb_prometheus.py create mode 100644 sregym/observer/trace_api.py create mode 100644 sregym/paths.py create mode 100644 sregym/resources/trainticket/locust-deployment.yaml create mode 100644 sregym/resources/trainticket/locustfile.py create mode 100644 sregym/service/__init__.py create mode 100644 sregym/service/apps/app_registry.py create mode 100644 sregym/service/apps/astronomy_shop.py create mode 100644 sregym/service/apps/base.py create mode 100644 sregym/service/apps/blueprint_hotel_reservation.py create mode 100644 sregym/service/apps/composite_app.py create mode 100644 sregym/service/apps/fleet_cast.py create mode 100644 sregym/service/apps/flight_ticket.py create mode 100644 sregym/service/apps/helpers.py create mode 100644 sregym/service/apps/hotel_reservation.py create mode 100644 sregym/service/apps/social_network.py create mode 100644 sregym/service/apps/tidb_cluster_operator.py create mode 100644 sregym/service/apps/train_ticket.py create mode 100644 sregym/service/dm_dust_manager.py create mode 100644 sregym/service/dm_flakey_manager.py create mode 100644 sregym/service/helm.py create mode 100644 sregym/service/khaos.py create mode 100644 sregym/service/khaos.yaml create mode 100644 sregym/service/kubectl.py create mode 100644 sregym/service/metadata/__init__.py create mode 100644 sregym/service/metadata/astronomy-shop.json create mode 100644 sregym/service/metadata/blueprint-hotel-reservation.json create mode 100644 sregym/service/metadata/fleet-cast.json create mode 100644 sregym/service/metadata/flight-ticket.json create mode 100644 sregym/service/metadata/hotel-reservation.json create mode 100644 sregym/service/metadata/prometheus.json create mode 100644 sregym/service/metadata/social-network.json create mode 100644 sregym/service/metadata/tidb-with-operator.json create mode 100644 sregym/service/metadata/tidb_metadata.json create mode 100644 sregym/service/metadata/train-ticket.json create mode 100644 sregym/service/shell.py create mode 100644 sregym/service/telemetry/__init__.py create mode 100644 sregym/service/telemetry/prometheus.py create mode 100644 sregym/utils/__init__.py create mode 100644 sregym/utils/actions.py create mode 100644 sregym/utils/cache.py create mode 100644 sregym/utils/decorators.py create mode 100644 tests/e2e-testing-scripts/auto_submit.py create mode 100644 tests/e2e-testing-scripts/automating_tests.py create mode 100644 tests/e2e-testing-scripts/brew.sh create mode 100644 tests/e2e-testing-scripts/docker.sh create mode 100644 tests/e2e-testing-scripts/e2e_testing_readme.md create mode 100644 tests/e2e-testing-scripts/go.sh create mode 100644 tests/e2e-testing-scripts/kind.sh create mode 100644 tests/e2e-testing-scripts/kubectl.sh create mode 100644 tests/e2e-testing-scripts/registry.txt create mode 100644 tests/file_editing/example.py create mode 100644 tests/file_editing/example.txt create mode 100644 tests/file_editing/test_create_and_insert_1.yaml create mode 100644 tests/file_editing/test_edit_1.yaml create mode 100644 tests/file_editing/test_file_editing_tool.py create mode 100644 tests/file_editing/test_goto_line_1.yaml create mode 100644 tests/file_editing/test_open_file_1.yaml create mode 100644 tests/file_editing/test_open_file_2.yaml create mode 100644 tests/geni-lib/__init__.py create mode 100644 tests/geni-lib/test_deploy_sregym.py create mode 100644 tests/geni-lib/test_experiment_creation.py create mode 100644 tests/kubectl_tool_tests/kubectl_tool_set_test.py create mode 100644 tests/kubectl_tool_tests/nl2kubectl_agent.py create mode 100644 tests/kubectl_tool_tests/tests/create_del_test.yaml create mode 100644 tests/kubectl_tool_tests/tests/patch_test.yaml create mode 100644 tests/kubectl_tool_tests/tests/preconditions.yaml create mode 100644 tests/kubectl_tool_tests/tests/resources/custom_nginx_deployment.yaml create mode 100644 tests/provisioner/test_provisioner.py create mode 100644 tests/provisioner/test_ssh.py create mode 100644 tests/provisioner/test_state_manager.py create mode 100755 tests/sre-ql/cli.py create mode 100644 tests/sre-ql/codeql-pack.lock.yml create mode 100644 tests/sre-ql/qlpack.yml create mode 100644 tests/sre-ql/queries/absolute_paths.ql create mode 100644 tests/sre-ql/queries/application_attributes/application_checks.ql create mode 100644 tests/sre-ql/queries/fault_recover_checks.ql create mode 100644 tests/sre-ql/queries/problem_attributes/app.ql create mode 100644 tests/sre-ql/queries/problem_attributes/diagnosis_oracle.ql create mode 100644 tests/sre-ql/queries/problem_attributes/faultyservice.ql create mode 100644 tests/sre-ql/queries/problem_attributes/mitigation_oracle.ql create mode 100644 tests/sre-ql/queries/problem_attributes/namespace.ql create mode 100644 tests/sre-ql/queries/problem_lifecycle.ql create mode 100644 uv.lock diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..8a1a631 --- /dev/null +++ b/.env.example @@ -0,0 +1,64 @@ +# Submission API +API_HOSTNAME="0.0.0.0" +API_PORT="8000" + +# MCP Server Setting +MCP_SERVER_PORT=8001 +EXPOSE_SERVER=False +# The size of session that can be hold at most; LRU session will be evicted if overflowing +SESSION_CACHE_SIZE=10000 +# The time after which the session will be considered inactive if no interaction with the server; +# Cache of inactive session will be deleted in the next cache mutating operation +SESSION_TTL=600 + +# Timeout in seconds for waiting for pods to become ready during deployment. +# Increase this value if you have slow network connections (e.g., first deployment). +# Default: 600 (10 minutes) +# Recommended for slow networks: 1800 (30 minutes) +WAIT_FOR_POD_READY_TIMEOUT=600 + +# LangGraph Tool Setting +MCP_SERVER_URL=http://127.0.0.1:${MCP_SERVER_PORT} + +# Agent LLM Config +# Maximum number of retries for API calls +LLM_QUERY_MAX_RETRIES=5 +# Initial delay in seconds for retries +LLM_QUERY_INIT_RETRY_DELAY=1 + + +### LLM Config1: LiteLLM ### +# PROVIDER_TOOLS="litellm" +# PROVIDER="litellm" + +# MODEL_TOOLS="gemini/gemini-2.5-flash" +# MODEL_TOOLS="openai/gpt-4o" +# MODEL_TOOLS="anthropic/claude-sonnet-4-20250514" # this one is ok, but the model on the doc of LiteLLM seems to be invalid. +# MODEL_TOOLS="bedrock/meta.llama3-1-70b-instruct-v1:0" + +# GEMINI_API_KEY="AIHaveFreeFood_LotsOfIt" +# OPENAI_API_KEY="sk-proj-HaveSleep_LotsOfIt" +# ANTHROPIC_API_KEY="sk-ant-api03-HaveCats_LotsOfIt_Meow" + +# AWS_PROFILE="default" +# AWS_DEFAULT_REGION=us-east-2 + + +### LLM Config2: WatsonX ### +# PROVIDER_TOOLS="watsonx" +# PROVIDER="watsonx" +# MODEL_TOOLS="meta-llama/llama-3-3-70b-instruct" +# URL_TOOLS="https://us-south.ml.cloud.ibm.com" +# API_KEY_TOOLS="HaveCornsLotsOfIt" +# WATSONX_API_BASE="https://us-south.ml.cloud.ibm.com" +# WX_PROJECT_ID="fe3d8da2-be7e-41b2-8d92-f0e855869e28" +# WATSONX_API_KEY="HaveCornsLotsOfIt" + +### LLM Config3: LiteLLM not supported but OpenAI API compatible ### +# PROVIDER="compatible" +# PROVIDER_TOOLS="compatible" + +# MODEL_TOOLS="openai/glm-4" # you should use 'openai' as the provider to reuse the interface +# API_KEY_TOOLS="HaveFunding_LotsOfIt" +# URL_TOOLS="https://open.bigmodel.cn/api/paas/v4/" # the base url of the model, taking Zhipu GLM as the example + diff --git a/.github/.CODEOWNERS b/.github/.CODEOWNERS new file mode 100644 index 0000000..6c3c576 --- /dev/null +++ b/.github/.CODEOWNERS @@ -0,0 +1 @@ +* @SREGym/SREGym-leadership \ No newline at end of file diff --git a/.github/workflows/action.yml b/.github/workflows/action.yml new file mode 100644 index 0000000..fee558d --- /dev/null +++ b/.github/workflows/action.yml @@ -0,0 +1,85 @@ +name: Run SRE-QL Queries +on: + push: + pull_request: +jobs: + codeql-custom: + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Download CodeQL CLI v2.23.5 + run: | + curl -L https://github.com/github/codeql-cli-binaries/releases/download/v2.23.5/codeql-linux64.zip -o codeql.zip + unzip codeql.zip + mv codeql /opt/codeql + echo "/opt/codeql" >> $GITHUB_PATH + + - name: Verify CodeQL Installation + run: | + codeql --version + + - name: Install CodeQL Packs + run: | + cd tests/sre-ql + codeql pack install + cd ../.. + + - name: Create CodeQL DB + run: | + codeql database create sreql-db \ + --language=python \ + --source-root=. \ + --overwrite + + - name: Run Queries (CSV, skipping problem_lifecycle.ql) + run: | + QUERIES=$(find tests/sre-ql/queries -name "*.ql" ! -name "problem_lifecycle.ql") + echo "Running queries:" + echo "$QUERIES" + if [ -z "$QUERIES" ]; then + echo "No queries found!" + exit 1 + fi + codeql database analyze sreql-db \ + $QUERIES \ + --format=csv \ + --output=results.csv \ + --sarif-category=sre-ql \ + --verbose + + - name: Display Results Summary + if: always() + run: | + if [ -f results.csv ]; then + echo "Query results:" + head -n 20 results.csv + echo "---" + wc -l results.csv + else + echo "No results file generated" + fi + + - name: Upload CSV + if: always() + uses: actions/upload-artifact@v4 + with: + name: sreql-results-csv + path: results.csv + if-no-files-found: warn + + - name: Check for Critical Issues + if: success() + run: | + if [ -f results.csv ]; then + # Count non-header lines (issues found) + ISSUE_COUNT=$(tail -n +2 results.csv | wc -l) + echo "Found $ISSUE_COUNT issues" + if [ "$ISSUE_COUNT" -gt 0 ]; then + echo "::warning::Found $ISSUE_COUNT SRE reliability issues" + exit 1 + fi + fi diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a66164c --- /dev/null +++ b/.gitignore @@ -0,0 +1,768 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Visual Studio Code +.vscode/ + +# Weight & Biases +wandb/ + +# Project specific +cache_dir +demos +data +taskweaver_proj +trace_output/ +metrics_output/ + +# Ignore telemetry data directories +sregym/observer/telemetry_data_*/ + +# Ignore all dataset functional directories +dataset/functional/**/groundtruth/ +dataset/functional/**/log/ +dataset/functional/**/metric/ +dataset/functional/**/trace/ + +# Ignore Prometheus setup file +.DS_Store +sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway-2.4.2.tgz +sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter-4.23.2.tgz +sregym/observer/prometheus/prometheus/charts/kube-state-metrics-5.15.3.tgz +sregym/observer/prometheus/prometheus/Chart.lock +sregym/observer/prometheus/prometheus/charts/alertmanager-1.7.0.tgz +sregym/observer/prometheus/prometheus/Chart.lock +sregym/observer/prometheus/prometheus/Chart.lock + +# Ignore customized config files +sregym/config.yml +scripts/ansible/inventory.yml + +# Ignore cloudlab credentials +*.pem + +# Ignore geni-lib login info +*.login.info.txt + +# Ignore JetBrains IDE files +.idea + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Visual Studio Code +.vscode/ + +# Weight & Biases +wandb/ + +# Project specific +cache_dir +demos +data +taskweaver_proj +trace_output/ +metrics_output/ + +# Ignore telemetry data directories +sregym/observer/telemetry_data_*/ + +# Ignore all dataset functional directories +dataset/functional/**/groundtruth/ +dataset/functional/**/log/ +dataset/functional/**/metric/ +dataset/functional/**/trace/ + +# Ignore Prometheus setup file +.DS_Store +sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway-2.4.2.tgz +sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter-4.23.2.tgz +sregym/observer/prometheus/prometheus/charts/kube-state-metrics-5.15.3.tgz +sregym/observer/prometheus/prometheus/Chart.lock +sregym/observer/prometheus/prometheus/charts/alertmanager-1.7.0.tgz +sregym/observer/prometheus/prometheus/Chart.lock +sregym/observer/prometheus/prometheus/Chart.lock + +# Ignore customized config files +sregym/config.yml +scripts/ansible/inventory.yml + +# Ignore cloudlab credentials +*.pem + +# Ignore geni-lib login info +*.login.info.txt +*.experiment.info.json + +# Ignore geni-lib context file +context.json + +# Ignore sqlite3 database +*.sqlite3 + +# Ignore provisioner ssh keys +provisioner_ssh_key +provisioner_ssh_key.pub +agent_graph.png + +# Ignore tasklist +sregym/conductor/tasklist.yml + +# HTMLs +*.html +*.csv +kubeconfig.yaml + +# Ignore IDE relatedfiles/folders + +CLAUDE.md +.cursor/ + +# Ignore CodeQL database +python-database diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..8e15d8d --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "SREGym-applications"] + path = SREGym-applications + url = https://github.com/SREGym/SREGym-applications.git diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..a281577 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: + - repo: https://github.com/psf/black + rev: 25.1.0 + hooks: + - id: black + language_version: python3 + - repo: https://github.com/PyCQA/isort + rev: 6.0.1 + hooks: + - id: isort + name: isort (python) \ No newline at end of file diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..e4fba21 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..433b6f2 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,326 @@ +# Contributing to SREGym +## Table of Contents + +- [Contributing to SREGym](#contributing-to-sregym) + - [Table of Contents](#table-of-contents) + - [Getting Started](#getting-started) + - [Development Setup](#development-setup) + - [Prerequisites](#prerequisites) + - [Installation](#installation) + - [How to Contribute](#how-to-contribute) + - [Reporting Bugs](#reporting-bugs) + - [Suggesting Features](#suggesting-features) + - [Code Contributions](#code-contributions) + - [Development Guidelines](#development-guidelines) + - [Code Style](#code-style) + - [Python Code Style](#python-code-style) + - [Running Code Formatters](#running-code-formatters) + - [Code Quality Best Practices](#code-quality-best-practices) + - [Commit Messages](#commit-messages) + - [Adding New Components](#adding-new-components) + - [Adding New Applications](#adding-new-applications) + - [Adding New Problems](#adding-new-problems) + - [Adding New Oracles](#adding-new-oracles) + - [Pull Request Process](#pull-request-process) + - [PR Checklist](#pr-checklist) + - [Community](#community) + - [Getting Help](#getting-help) + - [Staying Updated](#staying-updated) + - [License](#license) + +## Getting Started + +Before contributing, please: + +1. Read the [README.md](README.md) to understand the project +2. Review the [Problem List](https://docs.google.com/spreadsheets/d/1FGIeLNcKsHjrZGQ_VJcQRGl6oTmYyzjW0_ve5tfM_eg/edit?usp=sharing) +3. Join our [Slack community](https://join.slack.com/t/SREGym/shared_invite/zt-3gvqxpkpc-RvCUcyBEMvzvXaQS9KtS_w) +4. Check existing [issues](https://github.com/SREGym/SREGym/issues) to avoid duplicates + +## Development Setup + +### Prerequisites + +- Python >= 3.12 +- [Helm](https://helm.sh/) +- [brew](https://docs.brew.sh/Homebrew-and-Python) +- [kubectl](https://kubernetes.io/docs/tasks/tools/) +- [uv](https://github.com/astral-sh/uv) (recommended for dependency management) + +### Installation + +1. Fork the repository on GitHub + +2. Clone your fork with submodules: + ```bash + git clone --recurse-submodules https://github.com/YOUR_USERNAME/SREGym + cd SREGym + ``` + +3. Install dependencies: + ```bash + uv sync + ``` + +4. Install pre-commit hooks: + ```bash + uv run pre-commit install + ``` + +5. Set up your cluster (see [README.md](README.md#🚀quickstart) for options) + +## How to Contribute + +### Reporting Bugs + +When reporting bugs, please include: + +- **Clear description** of the issue +- **Steps to reproduce** the problem +- **Expected behavior** vs **actual behavior** +- **Environment details** (OS, Python version, cluster type) +- **Relevant logs or error messages** +- **Problem ID** if the issue is specific to a particular problem + +Use the GitHub issue tracker with the "bug" label. + +### Suggesting Features + +We welcome feature suggestions! Please: + +- Check if the feature has already been requested +- Describe the use case and expected behavior +- Explain why this feature would be valuable +- Consider whether it aligns with the project's goals + +Use the GitHub issue tracker with the "enhancement" label. + +### Code Contributions + +We welcome contributions including: + +- Bug fixes +- New applications +- New problems +- New oracles or evaluation metrics +- MCP server enhancements +- Documentation improvements +- Test coverage improvements +- Performance optimizations + +## Development Guidelines + +### Code Style + +SREGym follows code style guidelines enforced by pre-commit hooks: + +#### Python Code Style + +- **Formatter**: [Black](https://github.com/psf/black) with 120 character line length +- **Import sorting**: [isort](https://pycqa.github.io/isort/) with Black profile +- **Target Python version**: 3.12 + +Configuration is in `pyproject.toml`: +```toml +[tool.black] +line-length = 120 +target-version = ["py312"] + +[tool.isort] +profile = "black" +line_length = 120 +``` + +#### Running Code Formatters + +Pre-commit hooks will automatically run on commit. To manually format: +```bash +# Format all Python files +uv run black . + +# Sort imports +uv run isort . + +# Run all pre-commit hooks manually +uv run pre-commit run --all-files +``` + +#### Code Quality Best Practices + +- Write clear, self-documenting code +- Add docstrings to classes and functions +- Keep functions focused and modular +- Follow existing patterns in the codebase +- Handle errors gracefully with appropriate logging +- Avoid hardcoded values; use configuration where appropriate + +### Commit Messages + +Write clear, descriptive commit messages: + +- First line should be 50 characters or less +- Provide additional context in the body if needed +- Reference issue numbers when applicable + +Example: +``` +Add support for custom fault injection parameters + +- Extend VirtualizationFaultInjector to accept custom config +- Add validation for parameter types +- Update documentation with examples + +Fixes #123 +``` + +## Adding New Components + +### Adding New Applications + +To add a new application to SREGym: + +1. **Create application metadata** in `SREGym/service/metadata/.json`: + ```json + { + "name": "My Application", + "description": "Description of the application", + "namespace": "my-app", + "Helm Config": { + "release_name": "my-app-release", + "chart_path": "path/to/helm/chart", + "namespace": "my-app" + } + } + ``` + +2. **Create application class** in `sregym/service/apps/.py`: + ```python + from sregym.service.apps.base import Application + + class MyApp(Application): + def __init__(self): + super().__init__("sregym/service/metadata/.json") + ``` + +3. **Update documentation** if needed + +### Adding New Problems + +To add a new problem: + +1. **Create problem file** in `sregym/conductor/problems/.py`: + ```python + from sregym.service.apps.myapp import MyApp + from sregym.conductor.oracles.detection import DetectionOracle + from sregym.conductor.oracles.diagnosis import DiagnosisOracle + from sregym.conductor.oracles.mitigation import MitigationOracle + from sregym.conductor.problems.base import Problem + from sregym.utils.decorators import mark_fault_injected + + class MyProblem(Problem): + def __init__(self): + self.app = MyApp() + self.faulty_service = ["service-name"] + + # Attach evaluation oracles + self.diagnosis_oracle = DiagnosisOracle( + problem=self, + expected=self.faulty_service + ) + self.mitigation_oracle = MitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + # Fault injection logic + pass + + @mark_fault_injected + def recover_fault(self): + # Fault recovery logic + pass + ``` + +2. **Register your problem** in `sregym/conductor/problems/registry.py` + +3. **(Optional) Configure tasks** in `sregym/conductor/tasklist.yml`: + ```yaml + my-problem-id: + - detection + - diagnosis + - mitigation + ``` + +4. **Document the problem** with clear description and expected behavior + +### Adding New Oracles + +Custom oracles allow for specialized evaluation: + +1. Create your oracle in `SREGym/conductor/oracles/.py` +2. Inherit from appropriate base oracle class +3. Implement evaluation logic +4. Document the oracle's purpose and usage + +## Pull Request Process + +1. **Create a feature branch** from `main`: + ```bash + git checkout -b feature/your-feature-name + ``` + +2. **Make your changes** following the development guidelines + +3. **Commit your changes** with clear commit messages + +4. **Push to your fork**: + ```bash + git push origin feature/your-feature-name + ``` + +5. **Create a Pull Request** on GitHub: + - Provide a clear title and description + - Reference any related issues + - Explain what changes you made and why + - Describe how you tested the changes + - Include screenshots or examples if applicable + +6. **Respond to feedback**: + - Address review comments promptly + - Update your PR as needed + - Keep discussions constructive and professional + +7. **Wait for approval**: + - At least one maintainer approval is required + - All CI checks must pass + - No merge conflicts with main branch + +### PR Checklist + +Before submitting, ensure: + +- [ ] Code follows the project's style guidelines +- [ ] Documentation updated if needed +- [ ] Pre-commit hooks pass +- [ ] No unnecessary files or changes included +- [ ] Commit messages are clear and descriptive +- [ ] PR description explains the changes + +## Community + +### Getting Help + +- **Slack**: Join our [Slack community](https://join.slack.com/t/SREGym/shared_invite/zt-3gvqxpkpc-RvCUcyBEMvzvXaQS9KtS_w) for discussions +- **Issues**: Use GitHub issues for bug reports and feature requests +- **Documentation**: Check the [README](README.md) and existing documentation + +### Staying Updated + +- Watch the repository for updates +- Follow discussions on Slack +- Review the problem list for new additions + +## License + +By contributing to SREGym, you agree that your contributions will be licensed under the [MIT License](LICENSE.txt). + +--- diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..6e77f58 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,19 @@ +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/Problem List.md b/Problem List.md new file mode 100644 index 0000000..c52791c --- /dev/null +++ b/Problem List.md @@ -0,0 +1,89 @@ + +| Problem ID | Type | Origin | Failure to Simulate | Fault Level | Failure Level | +| --------------------------------------------------- | ----------------------------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------ | ------------- | +| faulty_image_correlated | Correlated Failure | New | All the image in containers are faulty, causing error | Container | App | +| update_incompatible_correlated | Correlated Failure | New | The image in all the `mongodb` containers are updated to an incompatible version, causing error | Container | App | +| kubelet_crash | Correlated Failure | New | The kublet process on worker node crashes, making all the services on the nodes unavailable | Cluster Management | App | +| incorrect_image | System/Application Software Failure | New | Container of `product-catalog` service pulled incorrect image | Container | App | +| incorrect_port_assignment | Misconfiguration Failure | New | `PRODUCT_CATALOG_ADDR` port of the `checkout` service is misconfigured | Container | App | +| misconfig_app_hotel_res | System/Application Software Failure | AIOpsLab | Container of `geo` service pulled incorrect image | Container | App | +| missing_env_variable_astronomy_shop | Misconfiguration Failure | New | `CART_ADDR` environment variable in `frontend` containers are missed | Container | App | +| revoke_auth_mongodb-1 | Security Failure | AIOpsLab | Admin privileges in `mongodb-geo` are revoked | App | App | +| revoke_auth_mongodb-2 | Security Failure | AIOpsLab | Admin privileges in `mongodb-rate` are revoked | App | App | +| storage_user_unregistered-1 | Security Failure | AIOpsLab | User is not registered to `mongodb-geo` | App | App | +| storage_user_unregistered-2 | Security Failure | AIOpsLab | User is not registered to `mongodb-rate` | App | App | +| valkey_auth_disruption | Security Failure | New | The password in valkey is invalidated, therefore dependent services cannot work | App | App | +| valkey_memory_disruption | Database Failure | New | The valkey store is in OOM state | App | App | +| capacity_decrease_rpc_retry_storm | Metastable Failure | New | RPC module is misconfigured, then a CPU containment will trigger a retry storm | Hardware/App | App | +| gc_capacity_degradation | Metastable Failure | New | GC frequency is set to be high, then a load spike will trigger lot requests stacked, leading to more GC activity, forming the sustaining loop | OS/App | App | +| load_spike_rpc_retry_storm | Metastable Failure | New | RPC module is misconfigured, then a load spike will trigger a retry storm | App | App | +| assign_to_non_existent_node | Cloud Management System Failure | AIOpsLab | `user-service` service is assigned to a non-ready node | Cluster Management | App | +| auth_miss_mongodb | Security Failure | AIOpsLab | `mongodb` requires TLS certificate, while the client fails to provide | App | App | +| configmap_drift_hotel_reservation | Misconfiguration Failure | New | The `GeoMongoAddress` configuration misses in `geo` service, making it fail to connect to `mongodb` | App | App | +| duplicate_pvc_mounts_astronomy_shop | Cloud Management System Failure | New | Multiple pods in `mongodb-rate` service tries to mount same `ReadWriteOnce` pvc | Cluster Management | App | +| duplicate_pvc_mounts_hotel_reservation | Cloud Management System Failure | New | Multiple pods in `mongodb-rate` service tries to mount same `ReadWriteOnce` pvc | Cluster Management | App | +| duplicate_pvc_mounts_social_network | Cloud Management System Failure | New | Multiple pods in `mongodb-rate` service tries to mount same `ReadWriteOnce` pvc | Cluster Management | App | +| env_variable_shadowing_astronomy_shop | Misconfiguration Failure | New | `FRONTEND_HOST` environment variable is incorrectly set to `localhost` | Container | App | +| k8s_target_port-misconfig | Misconfiguration Failure | AIOpsLab | Target port in `user-service` is misconfigured | Container | App | +| liveness_probe_misconfiguration_astronomy_shop | Misconfiguration Failure | New | Misconfigured `healthz` port, making liveness probe fail, therefore pods of `frontend` service will be caught into continuously restart cycle | Cluster Management | App | +| liveness_probe_misconfiguration_hotel_reservation | Misconfiguration Failure | New | Misconfigured `healthz` port, making liveness probe fail, therefore pods of `recommendation` service will be caught into continuously restart cycle | Cluster Management | App | +| liveness_probe_misconfiguration_social_network | Misconfiguration Failure | New | Misconfigured `healthz` port, making liveness probe fail, therefore pods of `user-service` service will be caught into continuously restart cycle | Cluster Management | App | +| liveness_probe_too_aggressive_astronomy_shop | Cloud Management System Failure | New | Pods of `aux-service` service will be caught into continuously restart cycle | Cluster Management | App | +| liveness_probe_too_aggressive_hotel_reservation | Cloud Management System Failure | New | Pods of `aux-service` service will be caught into continuously restart cycle | Cluster Management | App | +| liveness_probe_too_aggressive_social_network | Cloud Management System Failure | New | Pods of `aux-service` service will be caught into continuously restart cycle | Cluster Management | App | +| missing_configmap_hotel_reservation | Cloud Management System Failure | New | `mongo-geo-script` configmap is missed in `mongodb-geo` service | App | App | +| missing_configmap_social_network | Cloud Management System Failure | New | `media-mongodb` configmap is missed in `media-mongodb` service | App | App | +| missing_service_astronomy_shop | Cloud Management System Failure | New | `ad` service is missed in `astronomy_shop` app | App | App | +| missing_service_hotel_reservation | Cloud Management System Failure | New | `mongodb-rate` service is missed in `hotel_reservation` app | App | App | +| missing_service_social_network | Cloud Management System Failure | New | `user-service` service is missed in `social_network` app | App | App | +| namespace_memory_limit | Cloud Management System Failure | New | Pods in `search` service will get stuck in pending as they can't be scheduled due to memory limit | OS | App | +| pod_anti_affinity_deadlock | Cloud Management System Failure | New | Pods in `user-service` service will get stuck in pending as they can't be scheduled due to affinity rule | Cluster Management | App | +| persistent_volume_affinity_violation | Cloud Management System Failure | New | Pods in `user-service` service will get stuck in pending as they can't be scheduled because they can't mount the pvc | Cluster Management | App | +| pvc_claim_mismatch | Cloud Management System Failure | New | Pods in `mongodb` will get stuck in pending as they can't be scheduled because they can't mount the pvc | Cluster Management | App | +| rbac_misconfiguration | Misconfiguration Failure | New | Init containers unable to perform required operations due to insufficient permissions. | Cluster Management | App | +| readiness_probe_misconfiguration_astronomy_shop | Misconfiguration Failure | New | Pods in `frontend` will never enter `ready` state, making service unable to deal with requests | Cluster Management | App | +| readiness_probe_misconfiguration_hotel_reservation | Misconfiguration Failure | New | Pods in `frontend` will never enter `ready` state, making service unable to deal with requests | Cluster Management | App | +| readiness_probe_misconfiguration_social_network | Misconfiguration Failure | New | Pods in `user-service` will never enter `ready` state, making service unable to deal with requests | Cluster Management | App | +| resource_request_too_large | Cloud Management System Failure | New | Pods in `mongodb-rare` will never be scheduled because the container requires memory that excess limit in every node | Container | App | +| resource_request_too_small | Cloud Management System Failure | New | Pods in `mongodb-rare` will never be scheduled because the container requires memory that excess limit in every node | Container | App | +| rolling_update_misconfigured_hotel_reservation | Misconfiguration Failure | New | No pods will be available since they get stuck with a contracdictory update configuration | Cluster Management | App | +| rolling_update_misconfigured_social_network | Misconfiguration Failure | New | No pods will be available since they get stuck with a contracdictory update configuration | Container | App | +| scale_pod_zero_social_net | Cloud Management System Failure | AIOpsLab | The replicas will be set to `0`, leading to no available pods, thus making service unavailable | Cluster Management | App | +| service_dns_resolution_failure_astronomy_shop | Network Failure | New | Modify the configuration of CoreDNS, making DNS resolution to `frontend` service fail | Cluster Management | App | +| service_dns_resolution_failure_social_network | Network Failure | New | Modify the configuration of CoreDNS, making DNS resolution to `user-service` service fail | Cluster Management | App | +| sidecar_port_conflict_astronomy_shop | Misconfiguration Failure | New | The pods fails to start because a malicious sidecar container will compete for the port with main container | Container | App | +| sidecar_port_conflict_hotel_reservation | Misconfiguration Failure | New | The pods fails to start because a malicious sidecar container will compete for the port with main container | Container | App | +| sidecar_port_conflict_social_network | Misconfiguration Failure | New | The pods fails to start because a malicious sidecar container will compete for the port with main container | Container | App | +| stale_coredns_config_astronomy_shop | Network Failure | New | All communication within cluster will be interrupted, because DNS resolution to all services will fail | Cluster Management | App | +| stale_coredns_config_social_network | Network Failure | New | All communication within cluster will be interrupted, because DNS resolution to all services will fail | Cluster Management | App | +| taint_no_toleration_social_network | Cloud Management System Failure | New | Target pod can't be scheduled because there are no available nodes tolerate the pod | Cluster Management | App | +| wrong_bin_usage | System/Application Software Failure | AIOpsLab | Pod will start with wrong bin file | Container | App | +| wrong_dns_policy_astronomy_shop | Network Failure | New | All communication within cluster will be interrupted due to wrong DNS resolution policy in cluster | Cluster Management | App | +| wrong_dns_policy_hotel_reservation | Network Failure | New | All communication within cluster will be interrupted due to wrong DNS resolution policy in clusterl | Cluster Management | App | +| wrong_dns_policy_social_network | Network Failure | New | All communication within cluster will be interrupted due to wrong DNS resolution policy in cluster | Cluster Management | App | +| wrong_service_selector_astronomy_shop | Misconfiguration Failure | New | No available pods in the service due to wrong selector policy | Cluster Management | App | +| wrong_service_selector_hotel_reservation | Misconfiguration Failure | New | No available pods in the service due to wrong selector policy | Cluster Management | App | +| wrong_service_selector_social_network | Misconfiguration Failure | New | No available pods in the service due to wrong selector policy | Cluster Management | App | +| astronomy_shop_ad_service_failure | System/Application Software Failure | AIOpsLab | `Ad` service will fail | App | App | +| astronomy_shop_ad_service_high_cpu | System/Application Software Failure | AIOpsLab | Triggers high cpu load in the `Ad` service | Hardware | App | +| astronomy_shop_ad_service_manual_gc | System/Application Software Failure | AIOpsLab | Triggers full manual garbage collections in the `Ad` service | OS | App | +| astronomy_shop_cart_service_failure | System/Application Software Failure | AIOpsLab | Fail `cart` service | App | App | +| astronomy_shop_ad_service_image_slow_load | System/Application Software Failure | AIOpsLab | Slow loading images in the `frontend` | Container | App | +| astronomy_shop_payment_service_failure | System/Application Software Failure | AIOpsLab | Fail `payment` service charge requests n% | App | App | +| astronomy_shop_payment_service_unreachable | System/Application Software Failure | AIOpsLab | `payment` service is unavailable | App | App | +| astronomy_shop_product_catalog_service_failure | System/Application Software Failure | AIOpsLab | Fail `product_catalog` service on a specific product | App | App | +| astronomy_shop_recommendation_service_cache_failure | System/Application Software Failure | AIOpsLab | Fail `recommendation` service cache | App | App | +| kafka_queue_problems | System/Application Software Failure | AIOpsLab | Overloads Kafka queue while simultaneously introducing a consumer side delay leading to a lag spike | App | App | +| loadgenerator_flood_homepage | System/Application Software Failure | AIOpsLab | Flood the frontend with a large amount of requests | App | App | +| trainticket_f17_nested_sql_select_clause_error | System/Application Software Failure | New | Too many nested 'select' and 'from' clauses are in the constructed SQL statement | App | App | +| trainticket_f22_sql_column_name_mismatch_error | System/Application Software Failure | New | The constructed SQL statement includes a wrong column name in the 'select' part according to its 'from' part | App | App | +| read_error | Hardware Component Failure | New | Pods on the node will encounter read error when visiting storage | Hardware | App | +| ingress_misroute | Network Failure | New | By modifying Kubernetes Ingress rules to route traffic from specific paths to incorrect backend services, simulates request misrouting issues caused by network routing configuration errors or load balancing misconfigurations. | Cluster Management | App | +| network_policy_block | Network Failure | New | Service isolation issues caused by network partitioning, firewall configuration errors, or overly strict security policies | Cluster Management | App | +| social_net_hotel_res_astro_shop_concurrent_failures | Multiple Independent Failures | New | / | / | / | +| workload_imbalance | System/Application Software Failure | New | By modifying kube-proxy, the workload won't be distributed to pods evenly and balancedly | Cluster Management | App | +| operator_overload_replicas | Cloud Management System Failure | New | Replicas number of TiDB is set to be a huge number, making a lot of pods stuck | Cluster Management | App | +| operator_non_existent_storage | Cloud Management System Failure | New | The name of storageclass is set to be an invalid value, making pvc stuck at pending | Cluster Management | App | +| operator_invalid_affinity_toleration | Misconfiguration Failure | New | An invalid `toleration effect` field is set, making pods unable to schedule | Cluster Management | App | +| operator_security_context_fault | Security Failure | New | `runAsUser` is set to be an invalid value, making pods crash | Cluster Management | App | +| operator_wrong_update_strategy_fault | Misconfiguration Failure | New | `statefulSetUpdateStrategy` is set to be an invalid value, making pods unable to restart and cluster unable to rollout update | Cluster Management | App | diff --git a/README.md b/README.md new file mode 100644 index 0000000..8690bac --- /dev/null +++ b/README.md @@ -0,0 +1,93 @@ +
+ +

SREGym: A Benchmarking Platform for SRE Agents

+ +[🔍Overview](#🤖overview) | +[📦Installation](#📦installation) | +[🚀Quick Start](#🚀quickstart) | +[⚙️Usage](#⚙️usage) | +[🤝Contributing](./CONTRIBUTING.md) | +[📖Docs](https://sregym.com/docs) | +[![Slack](https://img.shields.io/badge/-Slack-4A154B?style=flat-square&logo=slack&logoColor=white)](https://join.slack.com/t/SREGym/shared_invite/zt-3gvqxpkpc-RvCUcyBEMvzvXaQS9KtS_w) +
+ +

🔍 Overview

+SREGym is an AI-native platform to enable the design, development, and evaluation of AI agents for Site Reliability Engineering (SRE). The core idea is to create live system environments for SRE agents to solve real-world SRE problems. SREGym provides a comprehensive SRE benchmark suite with a wide variety of problems for evaluating SRE agents and also for training next-generation AI agents. +

+ +![SREGym Overview](/assets/SREGymFigure.png) + +SREGym is inspired by our prior work on AIOpsLab and ITBench. It is architectured with AI-native usability and extensibility as first-class principles. The SREGym benchmark suites contain 86 different SRE problems. It supports all the problems from AIOpsLab and ITBench, and includes new problems such as OS-level faults, metastable failures, and concurrent failures. See our [problem set](https://sregym.com/problems) for a complete list of problems. + + +

📦 Installation

+ +### Requirements +- Python >= 3.12 +- [Helm](https://helm.sh/) +- [brew](https://docs.brew.sh/Homebrew-and-Python) +- [kubectl](https://kubernetes.io/docs/tasks/tools/) +- [uv](https://github.com/astral-sh/uv) +- [kind](https://kind.sigs.k8s.io/) (if running locally) + +### Recommendations +- [MCP Inspector](https://modelcontextprotocol.io/docs/tools/inspector) to test MCP tools. +- [k9s](https://k9scli.io/) to observe the cluster. + +```bash +git clone --recurse-submodules https://github.com/SREGym/SREGym +cd SREGym +uv sync +uv run pre-commit install +``` + +

🚀 Quickstart

+ +## Setup your cluster +Choose either a) or b) to set up your cluster and then proceed to the next steps. + +### a) Kubernetes Cluster (Recommended) +SREGym supports any kubernetes cluster that your `kubectl` context is set to, whether it's a cluster from a cloud provider or one you build yourself. + +We have an Ansible playbook to setup clusters on providers like [CloudLab](https://www.cloudlab.us/) and our own machines. Follow this [README](./scripts/ansible/README.md) to set up your own cluster. + +### b) Emulated cluster +SREGym can be run on an emulated cluster using [kind](https://kind.sigs.k8s.io/) on your local machine. However, not all problems are supported. + +```bash +# For x86 machines +kind create cluster --config kind/kind-config-x86.yaml + +# For ARM machines +kind create cluster --config kind/kind-config-arm.yaml +``` + +

⚙️ Usage

+ +### Running an Agent + +#### Quick Start + +To get started with the included Stratus agent: + +1. Create your `.env` file: +```bash +mv .env.example .env +``` + +2. Open the `.env` file and configure your model and API key. + +3. Run the benchmark: +```bash +python main.py +``` + +### Monitoring with Dashboard + +SREGym provides a dashboard to monitor the status of your evaluation. The dashboard runs automatically when you start the benchmark with `python main.py` and can be accessed at `http://localhost:11451` in your web browser. + +## Acknowledgements +This project is generously supported by a Slingshot grant from the [Laude Institute](https://www.laude.org/). + +## License +Licensed under the [MIT](LICENSE.txt) license. diff --git a/SREGym-applications b/SREGym-applications new file mode 160000 index 0000000..00bdcab --- /dev/null +++ b/SREGym-applications @@ -0,0 +1 @@ +Subproject commit 00bdcab9caecf7ada6b264a41b35e238d7fe085d diff --git a/agents.yaml b/agents.yaml new file mode 100644 index 0000000..4bdd6de --- /dev/null +++ b/agents.yaml @@ -0,0 +1,11 @@ +agents: + - name: stratus + kickoff_command: + python -m clients.stratus.stratus_agent.driver.driver --server + http://localhost:8000 + kickoff_workdir: . + kickoff_env: null + # - name: autosubmit + # kickoff_command: python -m clients.autosubmit.autosubmit_agent http://localhost:8000 + # kickoff_workdir: . + # kickoff_env: null diff --git a/assets/SREGymFigure.png b/assets/SREGymFigure.png new file mode 100644 index 0000000000000000000000000000000000000000..a1915e69efc18e0b8668528b868cd7ada318cc26 GIT binary patch literal 330255 zcmeFZWmsHWwl$0fcMER8H8{a7gy2r$Zb5^)1QOhWYjC&1-6as*T@r%3`@2cb>C=7h zJ^kJOeqWvnwX><(vesB*jydM63RaMlL_s7#gn)oRk$NMp1Ob6y3;_Xc4F3$+^GdQ( z1_A4{)0Tb-AH84M{?!d^h^Hx$!xGEv;~s{E2w61r=sz8nD2xW}-Pa+t#R zc)$5gXfu`1;gA?&OUJX218)k3Og@Gs6y?H9R!+_!+8qLN3j)y&=D^cWdmuOh6|Mrg z&WEME98!Ev*IB{xzUr|JmnriNGXzBNM;;woydNTWln}1P!t{f1zO;eO*Cn2XnB^Zd zB%cun)V~g9SFKkKo>0A7|03(jh_@~Y5jAz~38ID|@QO0SfoID^C4P;R`a-l4#!x88 z>*T}~o?I8E5~bSDVLa|;@2uRB?J+M&Fzf54S%khSZvUq8SYlnvs$GgLrgyQGv zT?A_9jG#9qY^I5IlDzDlu^PndbunlCv_UnH!uFMRqq@OKppYn-H~~w_CDHa1pctY4{1_sO zNWkkxiDc^k6snvXnegT``@5?5dfeRIsCk^wl^{B5JsCuWFVwUnE3`4Ry`-%&OSAGh z45VO`p9~oks@S)qpqvg8gyvp9(IY-_3NljsO`%x^DA=e+ju6%N>lkuQP7wHFUT@=w zj-Idcqd$AU)?%n2#kMX&;R=I)EX2RwMeqCaM`zLA zFl0BnA^I3Dce|Gz3qO(Wy}ncLTz#J%k=`CcBtlvMTtIOEbLY_ZI>j6<3#BmLBpShh z{LRdDN9mV5hJ#2CF{gqsc6;bm-==mqo_dzlIUMjJd0QOFAbk8m3r0^MlbkOE3pO__ z^Gea`b>w8lgdMS>;?JAiy8=&H(y!buIc8^fi{)UJ0P9(E={>DIGUsP}era8rmDKH> z5e7pVe9q#*l4^_D9XQiG;$)K{6BmZ}4(;O(PQUZ?)@!wOEo+(V$&2_eLC-?L}Wyl*-KNuc%H z_&lkI9y;eL*77A#e5*deB7*!vy5jJlJRL^Y4kqAw+lgk|T~qxCkD$EC&;$Hw(a~Z$ zIJUfSwKv&Vk-kBPiXhNJ7q`1BAk_Nre|feC2`7wY4olNMvdUi%U*^AHckcLM^9x$@ zGg5RIa&nFUzb#7YAS%O19!jfjI0W&|XpI;6Qetcv2wB2E$)%&%j8T+=t;8&&@B0mY zz`RG@7Pq0ej3)UZ|3e@obEg2+GC(u)yCTCj&DsZn-S&VcvST0a$){#>K z`Py-a{ApI9JgHkD^di*PIbA8e5!=F6`fiU#Uf6}#k`ez5hM6D{fBG_#j(_N4(9ml zahvg%<5WuA+f1NvD^X`^xM<~m%=Z`#soIi)Z(GH^MIVI6ax|3q=@hZaO;Am!OyF_@ z-}{ZrUJFlUX)3oA9#2~B7+S*C__-&$f4X@qD?21JB*yewwxsq=si<|vJgDZ@@^G?d zYJaj}r*H@H9A~F#GO~C@#YZB0ti;^A8e3D)8n!gvImS8iLhVE5T0zGIx+UXe!MOQk zea^AJT&C7I@0rD!s5|dH**#Q?`sXZ*+^91jaUcuLpv-!ELXlVYUxGnjgJsuy@)5bG&g(x zj%UWM$)L%gExJ;%B6`w1drT@@*C?+t!@lv)r<}T>81gUhl^l*06%G26(US&bMl4>4o?O?^dpj$1r zue}f6E81h8&Aza?FuWbQNc#@|oxfMAS2eQf;LAa~J*{1VOZ{QLBeDzQd#m@JFP9BH z2d<{RkAyw${n&}!t?t^sfZ0dL#0sIVmbsdk8wRavZ+MzeSR0OZ2V$krv^rM0U^H0S zH>?fVh1==u%>TUkInZEC+SVLG3pog7uE@2~ z5NXMAnQ?Chwta5<_89d@SVrR_c1uOy zj4t{{qkkuAm!C4-SAB37$q6lu)SX^NfRVX`ZBL|Lr=GEy3Mw2vT!YY_W}Hn4a}FCe z%T@7R_Q7i{$&s)Jx{uGXh?Q|pd7MS}nrGSV!|J!~o$SlM-+x!{U6HGhQg~5KHIAi> z^$u(01?-Dbshwng?~}W)o1al)dcF?P<-~T!rqSIiL*HqxI%&6G}^HrE8II$FV}^$Gbt8LGTb?&f?6X!C%II z$ChsDZdNw5_w{6meq;y7p(k!VVUB!Gt7_$k_{QD?d#YI^2H~1SMMK6Gh9%ac}OnlioWAhOZO$+(Tvf7x~v!VVV)AmU3#b zHF-rWrJ6XTV?V=rV8zQ|NL=6=aa>+^yWDj6X{+VS_(pHEcGT10xbr}wBh(UC8E=#E zp{BpkTy{)PIwrmvn-GVE-up`Si2Iz1fC?P5E8P-rbC3I|c4yrO4j$xUzSX8@ywCks zSkk=r{e8!YUTeycPLB&T1Dl#-iKAA7366c7@l?%JYw^2j^HMtPQ#p*=T09`iE_?7NASj=W6o z=RdBnc3;^4{8?U?7ssLKyW{M3l@Q#$=Lz!Wx5}zzGp!M~biO;J+l0JRn@z$6odvY} zotxcE)VhdXqyxNU7I$r{?bJ!PvzWtNtORK0N6$c)jk~G-;A(e7P>1hw&@y&Fc3l`?elUi;WSBX!R=(WCtDlpZq_KkbWig+GF;X z@z8$g{voxmyqRArAR>U5A?r4MuYGeTX8ZL}v(47s;B>uD=c~d< zncn`*;6{1v!`6dG`KhjttMZN7qxBHqMHBns=VhU5)?LbNqaK>|!sk&Z zCU+frkjS)=T=)=o4M-50Y!Jw2$@&?RubsOTka*NqR{qz(RbNlJ)Ad_r~QGF0HCg z`__ft4a)~X?ph6UAit0^(U3Bgm4%=Ow&5XQAPFE~fh|bjpCBaRKek^((n3K0+7AT* z0fqI)_S36=|3m>#PyhS%OjSNj2W%5rh|8-0&%isLejvfXFWTSFz&4a|C5{QS4+MlT zgp|05iYw$^8f^XK>zTe0^~EI}Ep4rw`r3NjIzzntdga9s+KkrG9;t_ALoRCd4b_eCXM+f z5%NDjC&bgOKKqaF6S&O(yVd{QX#d}={(mo{{{!& zd5Q|>Fvfk%8tAr`5O{LpUl1dUf~nrDa7O;olD3TG&zKLRNb)E2$n;@mvEqNCFz9pl zg#R11TQ-Mkh9|*nFnpfL8`kVVv7!!Q@I{yb_s1Js?`bY~z^ z3;l<0|1QpueHIqig0G!$cI^g>A-Mg%0RLcNaW@jNC>)vWXHNg1ZaZ>cQfO_vRj5Bt z^4}u@89cmrY@j}a%GLf<2H9n#4dXS&4*VZjuze1B_x$Li6HhYI-=YH} zOt^vtN^7nAB0$YC>e;W?p-02RoSr`SFA3s^QBeOQkhwy`S3E~rz-omg4yX42Z?p18 zoECmTHVQV89!Pv?6kvhycrnn{LGkCGjLFeUvKZmP6oc?$e1FUW876wvv8h@ka-k|_ z;Y2R^ek$d7&ZDj8i}nEOe-rwzgjX-Z`78@lW*$9$gz|CQetwvTtHLfp>bR-xpN;bk zG)umc;X4sH+r05uC;DS7!{E(3_4oVTuC#ruRphYotSBtt!QuiR9sdl~e!l2-6upLP zs?nH-MdtxW^khqcDSvwv!Z{Rnc8`jZ>m~ZQCdW{9XEIb?(~OX zFmJ@b`3ETeD?9kT5%KK-eQM=fchd5=z|bgeE@$a{W~5ckwf1}Jk4({z&hc>>)4^o@ z?(DsEng~zx3P}keqSmg!*Q!_n5B9uf>shV{iD%qPghc+^djzK07avkD0JJ9+g6IE% z{AwK^Nwh?Ni+2+8*Psf^w!X{9MCBezq<;7X3m$3+Kzs#2vZEvWGupNZiv;0R ztoaf8{T*f?;O`=R>pDyHF*v<$w)1OmSrY?{YZ=1Bey=H8Y)IQ_oDXXa&qSsPIr+yl zo~G+a2v9C}BO24homx>FLkZM24lUH22n$#PJ6^MYJ8;1JKDO@@x|k1v!)rqW>S*7s zbq02mfcFB9%8JuoQK0=nI;_AM@w&8PVMTCz8Gr5bRe?e`mg?D7H(I0XKj~F<1`t~) z_dVI$1oHd8Tl)H++8Z|rw|}1VIfIL|@Jizqz7%9HoBMnE` z#K8Kk?8CQ2dzvi~NeKUvFT0o%h=KWJgJs25R*L5HUT^*@7XBn>U_U$!)UuVqbMubR zOA?m2A;%O>uJFB`g$s6CzX0y{M%*_|=!ktJg1cW>D3tRXy0OIzJD6Vwh!TbHXQX=$ zZTO-Y2IQ<`A{K@dW#Iew*oT0>5^gV}4TY6&MkgxJzciUW(o99ghFgUD1q-qhtcoT| z`7aqg1a9zP4osfkkbgrC00TP=@+BsTKVy@vnFw$e@_&049x{FPSRC5p&r2(MT@JU@ zaP^AVf2?MH_*oni*d0(wIT;Mdoqz}K*Nxc805{SH4{b`u58TM|@Y@7(&M6CRc6}l{ z%0HL+KMmPGM)N4{&v}aPf!;{!8a^yXHv&21V2b=AU_*dp>cZpaQzT4P3tMg!1hXw1X?6w0v!CH%+Lvcft@5zvl+H zy08%ucyOmR&uZ@zD$scmgrjimZ#YN&8TpD%-DNG6lFte$QN<_!9Dx5Jj;)5%*xmLL zYgSohM}0_Jc@y|rhHbXenkF*XP5k%T^c5b(syI!p`Yfp?M&2JIf$9AFFe*^X@ZjML zAvDVvgy#E;g<7U(N>L_~ak@F(O)AyK1`Fqsum9cn!Q(=~U6SUCtq@n1M6~g%%w6A4 z>0Q=ZvHmfD!eqWMd;DlxwN_Hiadu=-Fd)A#VIEJs(@rQ30R3oxPans4PBF!(xbujZ zub%JkN70uZO1Uop5#`E=+!a{9j{EbRgB}al;nU*~|2P>GI=TQBD6D5>T^I%)3?+hp z`83mh+(8g9ptXg$4%&Wj_UlhBB}c^E|MpZ4P|nT9*J4prm|7it@u#hUu)n7(NW>R* zfN=nf_~435xP<1-f86TlA(FHEa`a^uvLg;@_A-YHuPHQ&Vb68Loi@;F?POH%~ zm*3$32b%wRBTDiqR92hLy_WuvwIaiW)I=2m13HS>c!r$tn|`9xg)6MI;L$+Q-UXR{ zzrd#fupMlC-R+XUY!ubaOW$3zLyd!$pm%={0ly#_{hdUG!JWagZ{(ORB2gxDIB$NF zOHcsJo|P7@yZ1mllt?^sNCo7tX!OEvYm{8G)(T&z;eDXZQlrF6qu!iqiwULWX179# zP~2E@v5yeby{BsE#%^o~#xmkq#vOcWWRh?Pu)kEw|CrdL!$bMa*RpEhrb2gM|C;ot zS+Ru=f>CGd88bciCmReyUVmED03a{+=ERV28NXypMvm_<)D?PM9&mVG*(DJ0`#hu) zaNEde)!8@&&*(LuPit}5tqMniZ}=YsKamZt1eET>rI1OVnZWro?mmdq|C<@^rbI8W zMv~yeZs$|$Cgdx4`Rld5L{M4QNYKY&0c6s0dYy<*JFmo7GkhL0nROaR!|>U)2WHB2 zQl?9_@NoMKOAvAB3;atf-h_T85pW;v4#r}7f5n2}&-hqbW5k>Ii|RU^1D)M*{!o|y z135szP!RYD0~0NxCQgV`!1C)lgulSUbg^Ulmk6J89Y8;gs}0PuhM0zzhk!)9YH zdIJ)eXJv1$O5g+}0j#ySJh?Q>xE>Y@bmME-V(aXV7yX!CV^Y6qcEWif3gZuc{-3V^ z5pt|HkgfJD=KV85_(M{RTp0}j|Bsg- zO!fwWXwfGY)yt*3+L^6eDQiE`IH*;I&==+pbi*4yq2GSyEj-Di`B zx~d;Jj)mh|KaV+&zg?4>4<4X6Za&DmU++r4P3GY>`5Ki*pkg74fcfHNigDmtc=d9{ zCp&{VEmL_@ZnOo?WouJUcXWg$VMc48Ih9mm?EAK~$de6P zar^Jx=W8~!P*BkszG>I0)PbT6E7{@*Sc4q+bk=JPSaeM|;lG2{Kga=|4A4+e@Pu`L z?&k*Xy8*I;>IoUmbTFb~VueP15qU#XFrYmg2J881$IAnqiDoqfsn6883w5^2XIo>X zU@6%TWYY9<<>4Yx8=Pzlu+v^GF{aGvhDmL%YcU+npFA;stUH}r921P}%Qmo88uevL zeGsL4*Si<*B$mYv#aKP>rO#)*ROQF4&6^Kj{%|(1s%6_E0b)1FXOo~;2oG_hGY!+w zjgvG*5BdhP_pO|5VI{{|+EP_WJn!opGo+EUH#52B=)d6n4~Z$vgI@3T9kmoxJ9ah< z;O*vYLSA;?!Jl>s{8vcw$3ho?lh~F^iDW`b!xVRWn*Sp|2wM@?z1f!&G09}R3$|^! z9#bcl_;GEM^V9MkG%GmilTa-pi?d-`}VV7&4@LMqX?frqz^o} zY^hPy`j#sbkoA4lUqNi9*>l+miFuSA_nxcnP*#H#mqiFsJ}9}3z)2UjZ~zV z89|X4PToTu<|`~tt#9TXHzx=WF0oi|FdBj@{7uw<{nk$aqKvZ?Eb|Jm2)4GyvIUn| zE;dqNcRWRgl` z$cEscC_3V3mp*AVV*8N|!oo2>Et>X67~N*qOe>c7_M_K#RIP5OKhHYgNEI61ZxlFe z4s#gwB6l?t;bUToE;Tv<>`8)g2nMOYEV5!NY`<4O9pV23}#thG&(IeIK ztBEA*Dw@dj&3>_YZ`RjNdfUXR&+ENC@vfo@y2e-E&)+3iUa`-E2k&HGvwU+i9*Px2 z3O~H$t(7|^`w+w({tMGs|C<5X%xY*>?%;qRvL(?V-&b=DG%Qa9(#X8T*j@KPb}`| zI}%vb{VSqF-(Nq5j_w%6mqiNsJTyOGZeqg2d$q{&aHX*~K>0k(5W`ESI&!^z~wepR|QT}7ci zzq?kxxj!e(V+tpv5$uTwWFnJ=l<*_QAvH0(?5nu47bX23;Sk4SNt1czkq&S{1eqQAFa?Kc+gs zgMZj@fDM16C*odjqJC69ya*c?kMk||Jckc=Z5tBh`knD$848EngU04ZD~&hN9I#A@ z6(NS*K^BykDG9lkTf>g)VGRt|Ds)N{F<*C=`B|e2dYpf%Ck+LX4JK2k^G$n3AwqSC zLX7n3ZzD`X{}sVX01<97L^E3WfN8wF5KSXbGGA{`SxYh@7QDKd<-`K$e^)rTf}1RR z7?F6NSE9dwQ;Guvl0(Sie_|w8DSZGXlO-VbNTp^MyX@EDo$Gka+HXoVYY>~JGrB|Z zB1we2WyZ23CM1<}-^Q8uMG!OFGd|2WIOI#G3bBam(Vks?B$JjG8|H-pz2_kSRZ>1b z`a~AmLR!7E`zQo}+%(e8TaCtK=#WB?^0JoVGi@!!6@DMm5QHp*~Rz z94>{e;+Vg~y1112c^FePUOV9^8-NfQAlQN?x*htG(Ue5UG~idSpEC;hxj$+1-w$6- zAWoEA!@S}|_h+py`$dtG<}pxH92o~ip$RwciaLI zam)eX!+fk-td={J!rjz;eJXkuDC%Pi4?g4gkqT(>PubBpzj?NvxlfKXtCIcG*u2%Yll^1#Jtv7r=vl(>oo?O;G1yNXda5u;CA_luCDmuI^l&j!J7qr!f zf1=??fQB!4VCmJrz&_j5Smlz?F|vGR*; zElSDy-@irQc+#Qm(on#{$a{uD;2QhN;V1IOK%8{=<9^)=t!k0V+1?y`q$kl!K>qA3 zw=_u-gxWJ}*RNvgRhi1pR~UTwdx#{Eh3=m2mbZ;x_O<0|UD_yA7bxPOxxJ))cVnf( zI^ywqN+J~+sw@8@+ZJsfM^m3%Z6zu*mRy#)tND^-ITt4dcRXH)5M^SCvZ;1QmXm(k zDbo9UdF3k6^(TOfIVGo4C+qVnHx+CVa;4(Z$Mc6*tt+{ih0^(KwG*;CFv1|xBQZ+4 z6mFVUFc3&Lk$#GeSoE&ME;k&HkRP=00_fL#A? z2-;g|%Xk|{zoBS(bGB`eUWLn~DSo%giNt6;kS@?9eFDhUNp5Csin!3ID0D;=6sO3G zg_pELa{edgqVIQ0ZTF<^!m`WGfv(Et^7 z{h{cP3!16lkOd@kfA;wh=k?bwWMrC`{$vcajdWWKD9|*9DaVY zZ}!_zp@=1eBe9V)bmOHDV0iw&=XAD$DxyPwBCc%Xn#pB;C>QCBH-~RGVc4J zo{!o z>wKk8inE2QObR_QQ|eW2GVY#N|9O+=y+7M9$=LUNw5_jIG-qemMj@C~h*VOsu`yI{ zGf6(v$Vw$mc!WfuK+;CbrwiukdEfR>xpkHpoK5tQ>a3sPDaLsW6Ss+y=B7(}F(Br? zjqat?#l|SGr@PUnkxC3R>e1E%D+@S)<1bcr!t&0VUpb)9OD}M zGW2TyQr&TF0OWpj^+GO-gp~9d9Exlq zI1^5uLXX$LB6E&75Nga1`|2@p(L7cla{d#W^R0@@Y2J)av$yOfOIqe_bG)VF=_I{w!sPm#)UHbv$E(l6MGJfM$PpzV>6B7A9<-}H?8sS ziIGPnFJFH87k|P7hO8glN2k!R2M@NKDU;ouF0CrTEbnvN9uM?>xHcQ~Os#)6!wldy zm6enYh6QrWb^^%zY*%2Fhs1yBnzQ&fc2xgG*Z2u1v6+~!w0a4~zW9@_nfMGkVRk0+ zBMZ~&mfb_kbediRL7#nyl27C*atZx&}-<+;~T%Mh`mTw&e zAI9yr$(&ju9Itn0`@5}V-;i=;x`2O9X5GIxt^~#5=KczC)gy*+KmwxibBrck(8E6> zoi``)gl8mt)5`yS_prvgECT(LSh`d+G_r_SL{(LEUDAH`bi<$qteaJY*JVC zX;Z1=>X!G-{pCu)<#*(5rTl!f!Red{c?DitPFsjW{H>KoZ+c7vW6`XIn47fq5g)My{t?PL|J*kmj9!e+C!ew=*3L8qQg^tl7wSnCS+_*K$kG$~$ zx~Y>?Y~ScQ1Z@Wo4v*Xg8zb4Z|1eg6X(d;AzXf6v3Ubzq>k~th3cKaB?%l zTE>ATU&ll*^#JTU`&+IkPQ2fWfbhR70ywDCpF^i#-{FK>52xL_bd&?(;Nt?erRsVe z^VNQ+@Ut;IL@yn({_%+@2UL-GqKMaOoyx3ann$G?Ct^DbYb&KvAfAaY4xH{6cZG_| zo$IENpd!}{@L`L29R>e$f$uA~0r=x-93^9^^Bf$wABcItk<={D8A=j{Q~S1?u0s{^ zBqpO?nP^6GfH1-vC%=3O=_6At(8?vRc{ka^LK2{XKAoe*WFU}-k|NDpwSl3^$(VMl zzh%5j8WMJMw0eBFRn?G9bWZF6F8UHSbx?Yn+M9!Mz_*GmE*)>ZcZOz?RpVlZA6M%%VlNXorOv9oa%e)y#YG$uD)N2_{XjO=ME$ zm?VZxT!+gKm?{=nHkt3=WSb)AHC-Oor&KgEUwTtXu+Ds1?+xCKW=Z^Mg8_HB8R20h zz5(Yl7=%viQ$zS_B1fo|MuT@NGd z8s%G}TchT!h4xM7T}=F+vHBTx_hTz-$DWR)M)m(Fi$9qL;efVjwLev-OY&!2Zh7JJ zaMxMHtlOOE%G4+2dh#Qu^*PX|vocl`o&62C~;l7xa1y_*wbQZl_A={BF%fNOj)=t?-#UgvBhc(#_>~ zjK&H#VQrUGB)<2o%A>L$822__k=CC^nmF5$&fzf&)h zPt!cB{+2DD%IX1Qy;3JZx?wM9d-oo1Z7AYiYrATWa+}3(h80bW8Y4F}J{vOa_45Zs zl786DqbFW;no(;B7SNH?;~49D7loi1x#h`=G~O-3K6S;^#HFwWrt4lHzQxq2W_x#r z`8MO#2bLbD!GO)X9o?6F_o8QLL)!dv@t=fTZas&f>HvZ#@P6iRa)4{k>VB}bKu^C? z*M)B4(Rtdr85>9TZpLN7B81|^WWh#hw)4Q%z@ClS=IJF{qd20NGB#g$8_;=k@z! zK9@53n94?kP>np(*Gtfl~r@-C2;Jv8diJF^L+d%s5=1 zG9xnbdmgr2s=Q29)xG%EM{=`#M)phMo_zQ#D<(y#s0eAhTR>r~HaKs52+fKk3ti!< zcRe<2pBi~m@fZK1;sf&ag{%85t69Cb=V1}?M;8m|4D11OQB?xc00Wn%SwJT(3XQR@ za4?yUZ-}5~wg>Uik6+)@<(2dNO{9E+C>OJvmspJv^)Z`xD;v58posuq|^QmNR z3Z*>0Sreq5^%yG2RhGzWkuzfvxqs!kqj)IH!~@q=kL^_k+H*M_Mk$|b#Ctxs%l^vg zAqQK;IM_vcB0nR{p)K(H?vuP^VA)BiE*u|Y>S6diG1}s!SESZj`zc93sY!w;ES>MB zH)tS%UNgQ`pk7k1`)E-niqmIGMzeD4+|9_meX##UaLo8-*H7j5Cej*wB9n4Hk$7Ij z5>CNe4sXR1>+a`P{M-e1`ml$ea_v7KKhS}WNmNOYnlU1HmI2)(3={T(qT3M97AjOw zR8+JX^Gp3gxn>RhvGfB7Y-}&j)`DAlXf6k#0es@a-l)-WJEo8+8%B3xUGPm*`@^7u zJKr90p~Xbr&U}L4a*D&nGKc-T_>)<@Z%DzMOg5;_e7-xApTurfls+sThdCv&OQ|bw ztd=h~=W(pjWcvw~*c@*+eFN+?R^d{^{60@f~zY<+OOI8yK?2(WXr5J`u`Z2+gLa<$y}YfOVN3+FrSTmFxUu_ z6HRu?b|>X}FQwURBgmx7K0_rp4cn%}Zq$uOLeh{r-{$eiRkH=k`1X*9IVT^@NQJ|q z67v?DS*&4Y*met6(pXITkS{M(;TxCzDDpr;Rg_xy+12+z<;ndC=e+II(K0E#n*vKL z&XFZJmja1McR`1WW{j0$d$XGYP2~hDKTSK-k577|$rm9jbh~SFf)C^%RjT-;A5aif z+Fr;+1>wqTN>&>6yf24>?OJ47*S}aCBcP-&-@qachXFMQ?tB?>4-UCWO=s6)PUbMK z-JULT?*h*k)>$~(aOzgy`4xpa3xOn?90pQ2Yzfa@zN7<%LYzMe>%TGgKm{Q(-Nxv@ zCX+`N+9L-LOPv*ND7i+xi3{*)sPJc~L~r&$K>~KXD0`WoU|;NU4$7fVv2$>!l&aU+ zELpkju{F|!I=Xsr+d32KGXpMS76#>Oc!3!Fr3u=Ip0}a6Oed}eleFbQNJwA}(i6^;P+|#(&9MV@|E-dJPsB zLt6NjU)gdBDYA;z>9g4ASuJnK=1=eCvR`mAhnz?*7vZPSWv@OU7hTW1*Tz-KG^kWLHipnptn3bboX7h|;PB_CDT?FEB{HfH@^z9)Hp>!4id!`e zlZSl@%C9`JDnC6(txE8b)3ebvP0qk_9Ue&%;Ku1!O~mY#&aooCV9;rB3vT5D%U@;7 z>|k4%AOEOfzJLbYHOB8sXARDfSmJnrr#QKlUioiIqXD~3tz{O#&X*)J0Bhj^OKsK(OCWE{{k*H zrZ3sxXDQd-eoSMp%O#f`T-Ro*RMkXQ1E}*IlfzsJT;fXgVxWYsm2~zh+$Yjl5tk1h z9d_(ynD4MzY;!mbD+wcSBwZk7zNX6CUd1**0MqL_*M*N@YcM6P&U{RCi9&ADM)8$x zq|15HdFYs_(`a8+e`KJFnutmQ;HnmQ{cybhRQGkU%||GlaC1>hFtUa7-LwWL1LOWD zQtvNW5}}yo83U--CmY#7ISSKbhz(Go{J3s|uN48ayx4Z7wMb)PXL*rpd8b-55K(Pz zxqxF#GnaOW4(Lbxvu0uU#`h9ZugSTzapoe9L0tu}4vmDL>;b9|?Hf>rR}Opr68SI- z&$x0RbtxJ3IcqyZZbPO4Tyz!-gF9dx{Hw*-gOrf7CP4ih#|4{4^9i^KeE4u(4ZF|-JFx|I0u&N-diS@+O<-Iyy7unrT9Jc z+c@pL;Z)m;PwNp42Khzt>mRO;F-H^F>kwDyZ|hy48|`l!EeH{RFw0;)lR=X{WY1$K z7V_fQo4*g(yp?Z?%)N}W-dpTC#TzMj3^;IH1D~UDIP@`P%WOY5rFgGy3C{Z%M}{`& ztXS6(b&X~BeNmDaXp9|Wwp1&S+gh`KW&gYH~z@82#R6*oQrzv7I-HVvT zTEp8ewQQshU2ultuY8ROUGI&MPAJpM$zV>$4cVIIiL7&~+IV0!H}kGpVhs%gLV>jM z$KlF;a$E|0oZ)k4k||}qSXxEZ2=y{OFi}-<_;}C2#42-k^&TKe#@H2y$ANh` zdfdzjK5;gk5K(14w^K+pN8X6>`ba(QGub@}xJa+3r3>IiZYqw6{Iprwcb(QKwJXh| z&U~V*#_@VyV&eTxz;+iN?ec1g&d%+)+}wAG;;K5Q;5UG_NULkGH9mJCOOKq_==|rV z2VrKQT=L_@(Q+sl;Cl7uybi6n$^;_ag-S%Jm=~Eq)3)Mb{o{SpBt1)hI1z8mGc*#$ zfuG8+X8h`IjVdElk4(M#1|n#gfK+X#_Ji~6?6Ej?V}|Oh(3c-kiGkLO+E47#lw6pA zC0t|!ZBdhJ^yQ}iwNuML=l8=Umld4&LY*cq>)8sC#E}`~W*sVzJ2oWn)c~$BlSYNG z4hvUt9qtIp&}`H5&Ni4QIdIb!@5u*~%>(XIR$%RI^ziMM}5_SKlW63sDShPq+W8j@G(n|TyG5Pwp< z#*FKd(Qb716Me>k2H37rpV?3XBeX?<4gjvVYTua|M5ESxc{$zocEZAMyPUSFIC8z!N zKtiX?Bl19oW;hY4OJlOmy%#ru%@nVLgSiDmK9*TCLa=PbZ#?Ji=6E&xrq35$O1}9M z(R`e5spLpb7R#Y0KWM*56!xw7JcheR9^}A>csmAjH_meSM6RiMyms5~o=ei(pT*dS zX6mOe@{CyE*m{FMVd~t#If9?|tS-kagmsMGc<`>44UU=Z8>X&9of`hwRV-b92nJ=| zx>NY%)D{!zk<5dXhtcU z&OJ>zB#`yCfnYLw2;$N_;T;Bzm#I$#UTY-xpLo4rT3Os=%ZRi>``x_zTMf4h0Y2#o znz~-;Jce=QzSSJKVbQDV@4G@RXCJFI!Rs>dnk%YhC!R)=mPjz0P$@3|GqX9&hOpc22zVe>3)uHGp} z2fU9)@B3F&GUgAru4D~I(_wqA9gHwwW>cpls#c)FQ*9D@Zd(u3O@8?vS6P%Eenrap zs2f8m2^v?fo5RFY4A>rRn6S%ipM~H|MR4_!{7V39K?%r015TkmMhW=T?otv!3zYAg ze|rkBe|ZYR?Sa1BdTK!VO@w9x=un`0juJkLfee#P_U^WIh{NFGO&6C%w7~pDKE261 z%LJ0Xn4G3-FRvdx#4)=?p!T- zzg;xh)N)PesAmI`t4aW%e7!<@AzVd$_6{#v_EdgnJneRxr1RtX^L(4H=BD-86mKg} zVm6qe*uj(z9lVLXxqyq-QWQ>`BgIY0VY!%W&0s!_seb5angqJ*CE2)Qe}_HGu6+9- z%On7{Z;eB*!3+ZG;i)g!r~bxrstm}L zZnc>K#|dZzq<*sIt=R#;jAprLh(1T^^XsLu5__YW6$B6?m&N!C0s(i97SHSPfq=#- zg2QS<+-A^<0by#Jcra!xyP2F5Vg2LXrjX;w52?Wwv1$))tJx^E5{*n90+LW{x-9n# zHknBYw$p()2IWlAK$S`Yq)EUc69bA^nLr;NC}HBtakj#M!?+*)yEqkBl(wrh|FYpY zqpKF)14<*XPP!LlPc+u7;nPZnDk~yy17WQeX8=NIvHOH%BMmnG8sq{8M%*j3x;k1@ z0~G*ehidaNih(Hd50b4Lg9*wLd9s22WrGPU#d6I2$g~*CCsPahk^6i1Oz);kimn(< zzxm8a^JMfp55_Z1F>#!8vFJ7z*sS$Ku@tvwyd&Yx^3a$IDzW%wAJgIeC1W5U@t~IZ z_tHGK9UADEJK=#g;*X*?!NeWHtcYN5uk(-{rz_!OrL!iw&zFeU`C~0K_<4`lGZ`vv zU7yZQrB8*l!*PG6I2N01gF*zv!?&)J)&!O)-8^b7O9w{vd>;4Hv7MK)t%W31)EM?O zFTRc`!&&sMOF06W{#;&gT|~;L&&OQfFN0B$+k3egGKa+6a@v{#4ulesxI?V+kr?ET zJ9|=H91)Dl&HN}k`~y@v)uV$+*J$pzDk>~`31AcVg(q>8-aVB9Nxbv@>3D+;*QZ zy&2{{4?RAcig5C@B>3o+1%HfOTg1Uc_NIcp4N+$HZ9G-KWmyny%8Uh5bN-vC{%2gn z3%OQ>czs=Ju0ut*ZlT@F6Fb@03VG{5I8+IoCW!9jptNU4LK`uF^MqZOqJ)b*2j z9jJ5V(&n9UK$A*ee2ZxuE<5L{%xEqijDD&npH|IeTrPXa<1E!D%#;D$s|D`g$IF%? zmrD-qv_LSuhKyeI_TN8hM&4c??q&-NyWPH8?j2@4y1@ty5+?^*3W~0Jg4O`KEC3oT zit5-H_`c0pw!D3Yx7-@DaJMx^SzM$5bX9NnM^omxH5W>Mc!n}r$_gUOL4!jjQjpMG zb|o|wAP=3~7arQnkxqz+AQh5puE_>%jprf( z&f5}@u$Xli1CATYscTn4pzmH`F;Ld9Y@>P`+drCzO9$R(?l;81^%!olTk|*VJkqE( zSDddhLu){4M`myHi~fysw!d%=j|n9m&-hY1tW+Tr>Np`*81|_wB2d!0x^ud=Cy~Yk zI>EWBW-;YwBh(ivCfKzHbHbm0YBmv)j5+v&akBCW&+u4u749QQH4x{+YEe@;Y&oo# zTD}&%0^Et1yf2;CA&L(~fwbjdViJ#{XbfY(LtCiF4yJjFLnkKI2)bB<$BqFzMwu9Ch>VGTc@n4?m&8KK+C6B2N9kb=#qrOs*N#$2RCmg*k^W` z_j#XM3p9+YE#E^4xa^XL^yVvz#B*n9m*Ls^X{uwuX~ACHU6^dCh2VG1LZ^Cw_?9=s zm*FH*p?GzTr>Qi4#`g2u@}-fw@(%( zmWPj_yd>i+df>zTt(FaYZ`%?Wj{oows+Hbw$0?%y}duIZXIi55rEDO1-B?fD^hB>FyS4(?h@AzXjP+mfhX zpBD5_hVa3-IiJxJ*twcg*Hf&qoZ?@<$05HF&1_?N`t-{CVc^3UU1R+r09q>$!KjZ% zqy(%7SlOn5Jt#Y`LI$vzNS#a5g2Z_l8$rj{)CodrKFxL^jb8I|>;~>h!9CHv!L^dQEM4Nu6#+$L}2)E*VCB z<?HI*(&q$zAVlRO%Dv_8Atm zd^Q*oj%4vr1iC3Z;i_m*j*H&r8KNvLJVx0#{bDnZ5A|BKV(0MFh~g zxA8nv1^NNkc$;BxISsI@Sk6v>)-&A3_ZuOt1%qxt6P^kmb2K$xGZnE1*$q(AkbRF1 z2---^I=m-f%HI0jFY25=pU?OA&w1#aPQBl+*YkOg>%Ok*wlavhrQ-!$k#G50X9^5Biay3y z+H(sO$1iChIAwS#$p3ok+xio3a;_kdw+?&V}SR{0^g3>`TOth z$I{157|lpvL{xIF0_L>0+(f*0v{PoSGKl}Pq~S8LLSHhi#gp8f1BUKgOuksQ(CX2O z!5&m6Z-~PtmswQn(l^sBE-&9=QY@0;`O#GvspUBPDR=JMbdi_QsBj@ywp_@XT&6Id z_-pc4oFb{mp)s5;{%SV*vMqfrclHy-;LYxa?j9RMRR-BB090=gD;7+hi`c}Wn)#x^ zZz!Z+V|p+nvFAK4+-j_hQHaDqh9BWCy4UtT{M=2%J!kkGb%ErO1Ob!Si;6&77GuZ5 zsd*idIh9`aI9%40cH#BLyqoP5MRAWB{o6jEw!XoMo;6E9Y=42{!Pp%{{YR_=4Hany zQg6wP2prSg9Q;2u_!HpzX9b{O2D0k68HQiM=QRJu1zZuSfs#2-9;`TCmmzjl821|i z1=MSv{L}rzy-j2;<)sHTSN2C3n|%Xd~_~5kt?&+}D){W1@6ae;jHKcW-IjL)} zNGQ1TU`J1fG^c7*ITqV4sbptmsP^psXlRS&ZHeRNJlsj%Z2im_ScGdI%fIzTA5Xs{ zUUF$Dx~8c;;asxKqjvomb|X`>RsseE##VyZTuMc>y%wAP)KT{Kr93V8}`d8alo%%7U6y#8DKen6#9$Bivwru zTl{Zkov5Vjq;_LqvY$he$SeGa*`C449v$IEB<SU!sa|bf1l&+_76w=9>OqTv^TRT&=f#Hs)T3 zBW8yeM~rN&S6>ICU4^_#L>k}GO*dS2^EdB{wchPU)UXcETUR#aNPzHm@#eD?+4O#e zF&bUg((c=x2c!$TbK!)??7u&hTP0k;TVtf&$^wOOVrw;~IfOzi1E>PT?RqP>;Y4lw z*ah8>y`Ae6K3~$?=d9+lV)y;y>AJO~O(Myg+DZdVp)YmyHGGP5^gVJW8-uQ|b+xRn z)?MaHfyDVE`C*5{ewjZ3OH7&-ic=%&aHteD!KiHzkCHMP8?&0IzWR^8TnU6NmE||R z<+ta-ZM%r|P6R|_j!ycsi#!2U(W^B;O8i=6&>zcy2KT9>~J4}z0+(FzmH%{`5z2hCmGWvYBSt zv)#hyWLy+V0pjK;<=?4M-2VJoUB**GgUdz_$LKc9tCO_a6RHi#%?8YuZRoAM7}*Wl z3iA(!nrk_VjOIVE6t_OVfna;81S%GT_`!a2=4%JDv&A;sjw&S%bX5n_smu)r?5Z2A!TgG0*+l-tXwAm-uBUyR-uT?z3mNML2-MSjTp6#Trx8;A~lL+aoIS*#~jW-ef z{VzKR^5W8utb+PA_b1AiCPy#MeO_8{WFwv(#&?l`5U}ktiypmXefO`bYsrGxtuQ{w z6!Sk&8#e^CAyL(29LCnw>xhrjZ;zSQiswOFwKZf!8IPP&Zn9jTkqmMc#fAd=EpTm} zC|4uAV7k8Qvu%KCVPU(KQy}KtxfjT(eMO^aqJ$y)3H3c~0ZAxLa{O>CLK+VVeMrMF zEh0Ied_2`^9Q$lskaeN<&}fE9GW)IiW#x01iBmvEV$^UUX!LyH&a}bNx5eSIpn+CG zi_M*l%w1+uFHR1h3km}GPGuF=^El5ca#8&E-d{xDLbP~E)=qY}I?x0n?p#kn1`W>@ zQC)YZ31p?i6}?RLXZCrVcYY*<$p?y~hF@dXF_P|2LPeNk!LeBt3+D zrrfya9>+W5-I$FB3%|@cn%oRI7S-a2`CN2kYShD+H$oT(Ho<6WhwX9WeKg4M> zm`C4vxUY?3Hl)S;-dLfGqa7c5n(pJ7++ExpEHnroUUG7_4NIZwns zS&0AUg_&B*o*G*^az0#UmTujfRowp2vPeMaf%#7J?aW^%q)s-t#Aci~DECRpUbAU> z_bPc2^C#AH;TeZvBe(Ud;hWAqpVLmt5?LTtbjNDu>(BAJ`A;v-sT3FMK$*J89xP(9IR1#8dTGE>cI||;{pMe4 z`*%0g#YH0`;(R5h_?ud_&qs!0Sxy5-cLg5+a<*PZFCaAZkuS7J`Wue6JkEVU+$fBJ z)BPh_wTERLAhuD*1tt}!cEv+!Gri+-oo;ufl$d6yCar6K6N(=PH-F%hIv}nQ{^4sV_97ZE< z@iifh)ad`+NKwlMr}n<=zXR$eq3z|7?xYJrix?(LD*XgB%IG&u2X59K9Qg&LRyG;U zxQi86egeirz8o6*fWe$GGhDE>c&TH|zAgG-wB9%Ik2!x+nQ zY&$dIOQPD~dRLB~tWkReo+xe4dk=^6-8)C!7rT}7*`5kY)v+@bT6mMm73ktEu^I-x zj=?8b?pSUuRr>a^dA$+Q7kHZKC8`!uHcPHwOz2Cf<}qBpm}B^9+ekOh*LrtP#CWVG zIy3HuvK25q2%^&bagKH-?LfI9Fl1*>Lv+U>*!5`IKpHz}V@jMmQgYaES2XA(6c_UL zU*J%x$0a5*Wg|zf8vXxy)s~!HCGeOegg&VFyKmYKp3!~rsBf4=_kOJJOc19Ve#^hN zZ(x3#-!&n)c>u1UQUqKKsH%+e6UWkelY6(5#NRh{fjW(Q;LHHc>E`)m=xx>|d_<7MLF?kZL@p45n zF5T7^I0GX4sU`$-J3-#_`^o!r>Hd5}%+;2AiuJ_ZZ61c7qIoJ@2tV}Z*ef~bzmkYf zSb!Mn`Dts75_z#%CoeYF)jg`B#A&sY?p*rxrp@?zYd;GuO83liCv$)0MdP_95$0zK z1nC`dDNFz)pw|n9M&~`HO1P-!Ir!W#0I@UW~@6-`Kch_di zQ?CG_ba`N8`;?h&P%jSDy#D<1QW+zTNI|(xrz;2^{ET?<`8ecJtrS$Tw}1+Kds#_{ zAny%lF|K?;jq!{E=$qZOK6E!_bFy@Qd(DP$-Cw4qU5Jx}S+GHjRPpyV3luZ;om?Xa zCNcmb+B$3?472x*64zO|+T>-oxrUIv|N4W#r(CsyfF#cH=`qAGyQI5Nnzx7`3>|5` zY^9Ii3_MetXlGV%Es2`R%|9Cd@q)ECL$N|YpZbPX*L7Pc!&oo>12Z%R0?gog;~zgE zgxbT8;EYpjHrkzVFmP!eJ%(fOQoh1*lU}ba>K5Zzso8LbINpLfDn5(G!}geHr%#vJ zsK;sV$o*q4kX-=EXjbjo?i zjHOEmbKLd11PW<@Zt-mv`d*1gFdFS#--8i?Z`DWTmrw9@S7);U3S+kzy_Zd$VdVa; zR8}dP^venJ@b<-j+qEMx(g9TEi`UrS0@2}YrJFFE4?K_mJs%3LnAsYQmwn4(*)T;g zU%!3PSj{GpX1Su6bUH5Qr3lfvvJ{};M-M$=dBkWeo{Ii)(X;e0<|{&9DAI2akJEpp zVj5%4>E2+>=#2I9U9O`n$hk!ZYrCN0BGIPSN1efi?G-Q820ugH@hDzoX!etuq_AJqCY=*F4PcIi6l zL06;E{jYyBV}H#o!weh--GOk|;$J^CU=+G=(aKH+#C_@$0n8poYApw9=ltz!eahqB zBlN=snj%eIbmrncG~?yI(UTNdo-Levl(W@Yv1vqPb3y6dn;iAVeRW8s6vki}Sw>H^>z zKYYV=i^pY8j%3=hTDKX;c&Ip2;*Bj?7$&N6yho1rWfB=g0q!QmiOCo>$;7+AF|QLL z@+#uf80HhiM ziMYQXQtWAGzclFx7^^y z?@>L^(6r-bgdV#=^zLn`H@k;+5}o-{pDV)IKG?5q+~W8l6L%zYPbm83Z!Sk$s97V+ z*bW+#634KD>(-mD0%a*bqSgV~DE+DB!(n#4PfCe5eb)(&alwRlcpN-ywxN<0{iO#K zy~X*VH}lug=~cgdd$l(nHpZZxMVv;x6I3O+Zi`t3rt#G=X)H0i#@@J}w-eHe_j$VS z;m!sonK@OnO3r$oj`N}u6fjx2^M(axs}Ndl`qD%dm3{hhpi=qt!L?BxkFvC?FD%FB zX5%7y{kPna4kYiCIL-yx+J6qZo{)A~lRr~7t(RbvE1^jHETLZXC`RD&(q&T)cjp zU62Idz;03GtN=de9&{3N8IQ?;)2)2*WxsW}ykh0M8L0A&lsNu}&D)K)$jmWnye9yakT%Tb6U7vu``-$9_;rCMy zLc7;GILohu!Ey2u(Y_uTTcJ+*n}$qV@36Du4*b7yg`xFA$A-H-C3+UDvD^b;ipYHN z(RaL5y+hlc;;qJb&Y!F^tKDK*UwyY@%5Y^UB_3SN=1jM4L2(!gC!~Eaky78`4qCa+ zxN~K5J>|F?(-G5tVugH*QORjGD$=cvOJ>tc+@%ae+%^LW8ZK!-e&|-wYi*wE)R{|G z|H7hKBfPYKy#_l?ZxNR@ivD$07EG53axSG#U&kkevI&z{Rb*b);45Uwb%ie}uVe9@ z>K7CL(B_u;$cm#9}g&skr`ww>t*PP5FiU0Ydc@w3=`Mk>t6 zRMrN1PxS>UT?9~gS#6FKJNtJK5K7_l;VK|PJ#Ve>91mv`_6&opMjh=8w{NxFE*<3Q z(x^Fl5n-lwU!tGs=Y6Z74E1te8{^JyxQZ%&VDz**RE1xTVf3W7+2q!bpV9sZzmdlL zx9{0Nl&Ju|KOe&6hxs^m9}|?eV5!09FvTE~f=-=r_=V{yz)y-v)xzsUBwMk|Rq zw9IYN?0Zfv#GDL4w8=4lT}v>gkPU87MF-!7fh;FOsJ#R`@vn2a>;We;w!cdu-$FS< z-gWZxt9n&-KS8K@i~zl=<+vUa*A&Bi9NCaR;*S=c=(Ot@kJnr7=NW6oLn(dtg0=_s zgQ&)dkQ}%PRwn<}NFcSpNk40*MhYz8*J>}m^v(@hSrN4BZ@vR5l#Sug2;(4C_1ODm z>kaQ0%+Ev#l*5-+uF08*zFfPKi0UVXbM#`rxu_9ja~cTbRKQ6q9d(Z@d~GV(4yuI0r4)!lCrD4mSi=vACd@{nh za0?<7qB#9P_WEHNl%AO+4NS_!8a97lL{?|>T}*B z=SsWh2)FKa6jWV#AS>OIA>C7GcFj7?ZoS;m3Nzvvp>hK*y*%$|cR`RoTv5j|P1 zJ^+@653;bL6701=Zk%ztdklJx?5%gQdC^*VR4t(+v&lO}v&nPP$+8zjo;w1=L{DOf zslHZK+9&J%rvwgt3`DQRn%!o5mxzUR0q3Cyk0qiNk zD@%i~hf%0OR})vzPrsF+JN-&rvmbODor;#QnzUHHuM>bT)bCpBpbCSwN#IKTSl-NAAz8jzce($X;oHVU@KaLfY;uJufm23rLF z;8~_?G!L;>uH6Jjs0mv#dz|A=EFQng9}XF}M$^e?l$4Yd@+Aw%^AHz{Y`%NnR-TlQ zlsRc=zB}eMbK3q|Xstq#rhe?<&W~Og)!#-HW;jyXtC1YbqhFBNk4+&2&JZcL53hB$ zYHD1A*DoUN$vcW3vo_Nt6?V3RwPe-|#c9&;=mAk80lvkO-x_D$u-8t)$WQNyhU`T0 z9H-TFs|t}G=ih^E21Yi=&!l<3#=Ky-2-8E5=+X!dJ=8Y7iW{7 zexx|_ceIg0P)&^3vGar2x~Y<}5#r&r3*4v%~hjC$9TTob;-tj70Yg4?Pya0><#;smaKD?WQKLFQt(B$C3QB zHBC}C0Xg)cS;H z48Y|do-snuNn%Oc0|UmtNW<b@NWhkEACt^4LP*R~) zRA@U1yfbKnB8px|+jhi_xkyeLTqHR9ls9eXIK@;7B{xE`+-i}V>!K^RT@WAEvmY+Q zYl(W{?DVg^FWNRKeW(`1(s?LoT;%t);oPU0D_BVKE8;d;(BS z5JorPmU7Rz5uV|wG2ZlR0++D%kw=Z`P$6p)5;>Uf>Dr?oX4*S#h|wx!pdw5mrf!&Y z$i1Zi#FDyc;){>zdTIAj-`Na!)%pZtf4m?NOEw31)1i$rUR2y3jD*|I%0gf+JFyu| z>nEn4{J6we-lp|dGP{c!v{UjycPdMWoxHWN37np5lby8~vjYvHi*211zHK<8a@S4C z;_As)Kbx8Xr<~^9`F>`o+NHwQ$iW$i>hfenY+Bd^{`sJX&{-&J2Of=Vg>Aeb*^(D) zISfUc9pDhu6Hn!!Y@c>>1xu^E#Xu|$MuzS6sphbG7+d7`_N(0_$0&SX&3KCG@!L}) zH5P{++8(GFb3l-(=cXq+kl;s5Uw)Z1m$8AX>ZF&<2{KEKisX}q{}Ab-P+dh2C-om5 z_#j2`!F3@^+Unxu5c`k&7qgvcCbWeTC8)cH%(5?--F;Pzqrg>Cwf&v>TDED>be@P? zt}9`eeo9CcwRF~0W(nOq-dd$21t(52USj1U3fK><2=k17523md~Fjb@OHP$VmP6=VEpt_ z!D8vEPxRJJ(E-;E!z-4dG=&E(=-+Gf%1>B>fH{_XvqVYk(vQ3aS0e!~bc z8nqo78+yCZxGr4;;^u!91mJduSToSJirZrp3VS3V-eDj$odema2FL4{bPB_`AFeaV zrUlT;SO)dKLn>8#UAS<8(8vhFfaz-%?O_ziUniE%>wFF{GFGJqzS})CWiW6`hZw0mwk}wMGpMYORq>|5WT)<( zQ2cS}dHx*pbSHtVi@s(x?R@Lxw=e+n;vecZpo-tbx-Zc88=zM8*(s{h9-apWmgiVfMb`i1GEW_hF)A~!$? z`gInQ#-1t|rjG@yKW{`@c-Ysbo?&7}UYoqn99+t_KbKOOXi&EAe#4o`L+|aJ!of$` z`&ws=NAlT36lZ7w)ZmUgZ;h~d;AOFd=pip_@79l?+2m2)|4_%hfqeq4gnWakbwa^F z5@S*+Gmo2Bc%gD#F;7GC;VsD#~>(dT(btd1s zO+KJAxVO2G3GADQa=qW>+fN|A=Q=tTEuug0#g-0kkR;)@W!zjD)4cCK8}gcAX(fw{ z{pu&SlOY1Qto~E4@!>w{&sy|agSo>G1qF~6uR2OF@F{jSR02rN%l3CeD*hJr3=H0R zoDZH~v&%JF2pT*nKi_3=#^73F0g>J07mhvCE%f0DGPb(%(Jw+!02MB&H@5|j?C@#S zq%l?WcdlLZpPOH&vGrG;(TTs9#gT5ej#tuVK=W*PRFeJK7elSShx2UR-wIk^?gp^l z0NH$mxf(wLTZxwvMe-?<8Ed3-Ot(vawIy`<3=4_s6J!-eWzbVFEoDbrqfnvImUz=Wyr3_Z)mu# zlcZe4q!0NgxH2|PRQp^&f~ze_*pWT&hp{vLe@}#}kyprRengLF2%X{!PB3hXWa`Tq z>MJskf^mPSJbpqq%nKgYd0#GLWKkKz0MuI19@s7j+qLz>ekhmQk@McpWY9~IKt>68QI+vdblVhE} z34vXiMUC?_ao0;%J~8K6>9<0bW#(_dj7Z5LEE;BJXxUlFR+LM&QI@`K+?{`hBd1d^ zs)POvi-+6#lERS9cN1-CpSWO)dRu}J`?px z0h+EZ4{ECSdK*eMn=}%@mW#@Bi%aNjBcKhNJuIKQ6}kJOOT!w+X3(4@yEic^Gsddj zWL?4=9mXyt$TWnK*w^o0!_L^1&(>TxOV*R2FR|1$Ocr>du9o!_nP&QyZ*Lm8 z!d@x;iR?*jbl46^sL-$LCg~6>aw7Z3RUer+5F<3pXXoYK@k7YHUI`{{()(LWRpvv$ z0qnH|TENDK9BFsN6$N}l#UZ5M7Ps}q&0X;Xe=3G@V5|HXC`kTM`7yPNR}C>kqki<} zJ|LJS@+wG(4}eI84W!Z}03rq*-M?i_LWM4XOiP|ke(nSeJfJY+8Mb_I!|PcyNlZL? zVTe#1W+XH}ZPS3DaNxG}<;*?5tc6K296?MhM{IPF9jp`y403gTevz2_(jP^`=b6HtmpBZ+@|USmUWHLw$Ot{Sd*$E=yC5Y_zV9hFuJ4j=z~T|yEVYZ&YmG<+7Njf? zwqBZBR4oj-Fzr3bS_Ic%s*6rQ{44aPP{Fux`bebFSGIB9bd8XX@_Y1DH`KdS;uX>2 zcM4}yz7>@FHr%7~k4&sZ^F!5tOzn6l@{O=};GxK@Ec4ff@bz%#@i!)mM$7__uT8gw z#9B7YSQ1%DA624dTKiV`xW!Eh4;9Q3IF6egvdGz6l;T)+Ielv>kr3xySi4~x5vt}U z9i61Eal`7dd*w?tH?$O;(f7`Q?ITWVwrUKspA+fXhFpx!ohd$zLKuC@P0NLT-gCia z`bylL#@bHqxt8A6cUAAPraQvMHWh={rrV`Dr8@~7Vy;$roZ2!&yYS}kbpf6?;K&G? zJa}_OcXitWk3k{)9y2^Ce+2#Bo^tzjXy^DNGu;qqP_AX4@W4yT)p0h?E<=MyhfnDW z&lm;>XkmF*8=>RT?yPSex1}|qn+%xd8Nt9z4k*KQy-UdWvy{}IKo`tp5!>De_PX}& zYG_cE-^ZnX_!bl<(r~5>0Vjy*Wh*50rzx>nP-vEJ1SEoew&^Usm!-1pPweIgP}dFS z7J@X2UZCgL#m*mf=QK)1;MDsXPBpE9oD@(6N}OK<-wYL_&ob(r=VbLJv2^K-1`tyg zo*SL#(RC?NUW_d+tqqi@+cotvj7U&u#$>3T{ZA)tA4-(#w^!0URgRp*11J@u7>*04 z=f99vC?O7)1Lgz9w8C);V+RKmitp6wJD$MZq9Fob5Fv=Zp{QhN=dXOt`k~0nClr0h zD(#B=ONy+k5AMZNOU66~WqAhZ(nYi0x7L6D49bGETLh*IxAGwgUwFz9vR2BocQQZX=`E5)d}B>sdz%U1e714YZc&481HpT9gKEJ zzg`>R4^BOOc90Y+4OO4sS zKB2HiV`7#Jm|*$G3~(cKda4zE#(1F?d(;8Up$H~b!(T^-^(LzMPXwB+@bj3QxUxaB zrw2g8q~ryPOSA}(s#|tf>93H&PkQTCh-lyqqeQgLti@0($hiW$q;SypVo>P`pau*P z36dmgJh_pNJ^L7aKLO!0cI`Qs)8VIHEmjaR2-_->vIWkOIOxhB$h20sRPmb?} zH~Q0qiE|{*UpJqQY+8@FBc4YwT(^FXX95@G$t_8C41ov?CED{Sggzg`UJQ2@;A*eF z9tw{)+-6@T`DgdbZ}J(CQxYj!+38BZ_)y5HQ~ESfEy!0UpTJb1MZ zO9O9`n|5#PEbwlNge0-h^-zw^7oOd?A$pm^?0SoA-N4_V7(Xnp(OpscMuw_Q|ErT1&P#ewW9aqv;K^X?qtLk^9VNkqXgR;EWzc=(l} z0-bY*K$F6;z!#Oz(}=TYIulLWbe_^|0}aKU;(HM3!YIAX+(`X1_v@67|M4{4v8dl( z(OJDY`;bTgqlEtDPoG=tClB>0DJ&%6$|0XQ8E*tjBg1U+ie3kyV7=F*Kael?`~eE& z^i+tkU5F&x1fFjQ@9q??`2G8jOBmQX;<&|FDu1H5Yz-Q;mI6Nw9jv_EyrGy@Wr8rK{u6tJ=Js32;y!e`*bP6L1Pq#O3BpAo(&>Iw6*Ve4+ zPW^2Z!cU5YI`#Oar6#nme|BX&4TUT5s7qw{_my}ng&+Wfo2mq=WKB41f*8j&!0S^^ zj{%3uF^+}5Hwiv_%$DKixHpLmjAi79Z}7R^LwcXy8Daetj6y_qcI;~toEl-DO8~}} zw{G1s)*oI62RkCVYpCwQq)+66<%_?@Z7406_Y|&D=<4%b@_nF<^>@)v`W@@^Ah$xv z!u&70F=AZI%gUDuM2k*-M?8{Erra5RpKo-^-!Vqv-xzcA!4rr;J^5Nfy(-|~lZJSV z3~jJ>cXj#jm;hO!BOTOmb%xL+@a{g+i^4ty)}5Kp?({B$S=9Vw5MQ6!QJmdqB821l zh=Nvhfe3ZV&P(W?`0l>~-9nCd*CL%BN3)7ZZ{-ZqI+flC=T;=9SNvg4ubGA-PpeAz zJyB-lMJ>kf>xJBUelO~Lt@Y*n3*(LJKYA#nRbqKLzjRHt?WU{?IaN1*U3mHFu1XVxXg5qx<3l zU%i!PEHmIft}f5^H>(Z!LO&)Jhe{YOlJKgP1*nTcolEZfAEqA1S8)wJ-`-bNFjriQ z&0&xm-@m}?iccDAb9rhxq@UXo0lvpyd&Kj2(sJ{_n^*w zm{)W3!JM!n084jJ1ZU|aV*dM2{weI>ss|V6x?i5%-aNde{MIMmrzP-6?-qX0KL0ld zqa83kG{)=vz550}DCYL*->(5R=yzZN@8Lgz1wN-<%N56&Xai^ChQ`KjXv>k$S-CUe zpOuI)Tx=wy>AIWRsm~bc&n}<cmvx7 zubUG1((TcAYYWrg?f5aJz6`C>Sw%xt#=$u_-cCS@MG7w)4W0eNpCvzd$B*(|S4t*W zIXoN})H=R?kDh=RL{FjK9)U+oADD9s(@4dq9PF+$6kSXJ!wCFV+^hi=iUnMo?5=3;c8RT3;;d`O*%oY74+>%A0mynqR=B0bzg8FvrHKlc#p z_dR_0ANLRpkmXXv@p@Hmv;;SB>vzL|R}j!4VKMq9 zG&EFh*NV0U3~+HUmr+6>ETige{N?a{5(37h-s)@%4;AZ`IVHKTMNm1FOava{={0LJ znhRZ$ZvykwTWFNWpUa&e{5}|hi#rK^pM#Sbhiyk+nDGKFKE*{xg<9v1-IjJtoQFqW}v!5nRcQO7TcY zq@cs7zA_XnqGCM%HBl(k{+Kq&CgGqK`BKIG3aUsFslOITZsL$-A)u*DkBYiVuUTEW zkzUSqi4f`=bjFyIC2{|LUUziLx75L*x46jKLU~jRg8jOYs5P0$Gj?B$;ur1kRT9Cv;|jn3AouUs$YjYjn*zYFKP?C!y7SLZ=X%nmNVM7^ zLCgB$P-TGqo?MR7E%65r5c-(Jp2VX^_r%)jf|qUn zBfmj4aSF8(qX~uPOP(!{2&+Yq@9RKY%-E@F-42B%DG}^#J$CCvwS;q=Y{^>LB0$6?lr!UbGG^aLU+LWXJ9-oGlIRwwuag?+yamHR=W*WKS)^2z1jx9$AD zx9w-xM~1vg!IjGOz=V>9-YmtuN6;t+*UtJ%3@d^`z`oL6Hx3kPkA?mx44gWReOh^u z(!cP1CF9f31r_YObafQ;Xvzz+=8wyI4rB{DsimMn8kAt((S_$smVY)Dtn5i?ubkwUe~;p6tecYjTTfCq_!~SjFDfp-ZH``luv9SM z+x@+IUTnVhBVXZL@xfBH_WN^!B?Zl!)5-EM=l{(0D(;^D+0FSZx=UgEb${GVNSkVT zPD-^?D4!^fU$lZ4^tykNEI#@TUgs#pq|e~Xn}V-?2J-mA?NPU0e6U^mVTArwJ_WN7 zF}IeMmPQQVPhoE_p7mgqB7XlL<>GN)k{|TE%l~!RQ*nIm=BnsT!Bm>2iQ9V85h@i) zAJ(F#Zg1Q(Yu0-`!SH3cm`XjMVb)mZyYorxf>?p9pX0>-`k$&& z1O)v4>k*orNW2@=YxP=*j(6ZwmZyEdA<-a@kP)As3^SIJDfWG0#$bvmmq=A#p}ms{jH4oyqgkRPgC*CA2V1dTHaH! znLcfdQK-Ax>%~-In7klGR~@=QALPB|oLpId=Xb<4ga|Bwd-1~Wllz_P_sNA(@joz= zJHLv`wNx*;$kvd}AR7DV%^ z_p-9ueIenzdFyxke}h3dsxG!%$pT&R+b1zyQ@orMZ8DCdUAXeiW_^(Fpfx7jq{1w( zjb#SF*K4{j_AmOm<45_Kh;98cz10Jr9NhE+|a<=hy`Pj%jE9 zf>o|#P}0zJLvl6(5m0C&BmVweDnJn8i5LmaYY(y>CwgdfJ|85P3@!ox1jqT*NbB7% z(QMv8R7g|(%uE4`P(p;ce?O={e-Hf95%sRERQSCIgJdnCXc|X8Sx8$c`eRkCg>ts< z4Niv;zUDKs)L9l~^v}CL0~@XcpG2Gh#OsQdF8t3;+W*~@U_<8nb9{tQA?Nzk5n7_( zJL0zaaRm|JD0$idP9@$5&DH(!)f?A5G}|H9fjT^~7;tZO1(*FT1O;69t2X|p`uY9C zbK=vfiVPv7;&g@+x2viiMq}&J2@$kRcCTkqMUE#kHicGcUi=cjoT^4wEfW#hG3_`y z8L{+xx!&{V_z>-in@CB1$Iv%4zi%j%;(u-^>30EAD0PUqZBqb?b-ekr`g^cfUe#8N zT`5IB!1W$@ul~mBPCi!#)?>dyebI!~pY*9RZ;CyYEe8aWYw7j4$!|qj-e8=;@y!dL z-c?(cAf)cBxbW`xk>dA-cigF=tbF{+rFPev)Dtc`#lgD1SC7dim@q z0kj$Ez+W5Y9u7e|_D_DI|9e2$(VIqd2TbL{OU4!Ohlp_*N}2@IB~@+1H0Fd#-VG=C zg%%QAc__;^9C2<%sWeeo2ss)spog+(w9svTsf^uf=3(#Yd#m-hJt+@xFAqbtJr5=H zS`(XN2$*WgD zr6j~Eq$sG7#O=+*#02}&P0zEZT_ulz8`WjSRh`H60qa+4O3Gs(p3s6TaF^!lL<1tk z=^~jGQ@O@rdMu5xd?4GNDV#P2=b9SZigZ zL7P&w`uX|>bmR-nV%pl|n~Q_8|Nl1S6lwqtAX}Zq8RA>_sJ68}cjNVISt!0~bF2)d zCp-`%rVzxthU%q#a*KG8W>EkqKLDop!M|L%MEEpVhPqm^BE<&C$pQe&`53%dup8mDp+V7OW|V*O#sNb;KR*5T7)N zb)giO%V~?T>emsFQ_~Y*XecIGLIO1;sLzt1p@13vQd1k7ge=iZrSbjc3dCw~g^fO* zZ^f>XM#LEBxGehiSB}(2`E{^<8G1N>gl7$?XK1|=WKH6#eB*>)R>8r+{9w6ADl5ojc1;>7e$l1ByJZ5u%}!g?#9;NV8&~1>VV`AiY#r zI&eQn{1#@ay90|?bm^`eNcVwR2Q4qJMl?|$6yPs6*Y4iEgTGL9u+|EGF;uOG`E!<2 zO(Dqsm{zlwj%OA_0=KV~^c3ibA-1_ObmVKo9rPl$*};E33u+W(7CX8DxJmB(XZh@G zK-Ogda7@*_5rr*mRAN9i_VCv@X_K}B$RpE&_>Kn7sXlyop$eXJPnJ9bNvtd(L;?}~ zrT>itbJzp<4xBbWLTK@BF}%~ZP<-V0q3e2C#490kNCWnbUF4q5kPg>M`+oiL0)Ssq zz;R*~Y(8QI&Uz})Ni%L&!lEDJ(aQ3mb~*ukyLxFUNMSrbMz~pBQ!WTZbS%BRaM~Sw zb{_{3^B6+i?GXxzfmM&Znk2z#`HmGHo05_3N9Y*Pv^y+DeF>qN_eNF?nfdvc93!Xa zVKH7ds`POnWW@Q-*$no+UnN+ogb&>7#3wYJAA%9^+DyAiDxri81&pFUa(Qw5`Z4p- z{md02B1yY37fx`RehoZq9pCi97A)OpDz9UW)L+R3E{ZMoNm#_V6Y{##p_V&BT88%A zu_TBTisA^L;KMBvw}TM4k<~uX0cG8$fsE-5fcNX9_@@4GM^91)HO0Pihi5(# z)fR8W7wM3M@rc7BQypJg@(q`C(Z#K&@-?n5k`<=kv4W)d4vU@VpDp&~UWdA>j)Fyo z`M6%Br}?Wa;D0WJZFyl5fIbz&h9YC=5Rg%afb3ndEYemhJP$iG0fnCf$p-&hwDCp06?O$Vkco5MxB5nYfUPswd|m0C(l<|1237g+@!yGqg&Ni*g*ob@tF47{bgo z(`!V#r~2Jc9Cth?KffXS2w$%` z5i?s|DiIAW&MAMQo{`IWBC1t6_32BMrGBKre~+s2=Ipj4fvM4Jkz-`_IlnG~=HxGQ zbcn!SiQFM0#4iVW$1AwF$;OvGN&UH*fq(|7EU4|Cgf!7pbOYF=Uh9wRA*8c{$Rb$T z3xSX#3+AVEFwY77IR~mr>%%=8AsevImcS%nW|zAi{BjXTcQs1W?a*d##qC%2UFWU% zO?bc=L$!BgPX<#^_y5xsV&oGr9O0pQGg^}uI7#skL;|W|7PBjb`YZiRvjzmA!B*Vd z|6RbGfg9YQc)fHn>D9X$N!6|frO{NjtVqWAbN)rQ)yjopy1Z1CFZdUKmu%f<{>Xup zroxE1Axgy-ECWo08a~5z2wG8uQIPeDO~fMyW*azVc@Gbqy{kI&y%PYl_zJQ8wSK!5 z@g(TabBsFX&cZNd@BCTw16U~t+Z(MXKVKYqgo}eS2Wp5k*y)4i6cg4BjOO{G4&G$= zpSwZ(K4scY!57I0c3^2dSI=^zDc2lsU6;>zEMRjDpoO$E=Po^lyehvm9z%D0I)K{s zPpw9Rx^w0%!~0jRT-(W*ndg69OM&6jk;3*#`CgO@&E20O;Xm*9d6dFaCW})Wnt9n6 z`2!UA>7NGk#2Y^pM5pS!KV;jp&SIxZCTBt~u3ftH(HS{VACW;(KvAK}8YD@iBX1}9 z1TNCzoQzT6Sr2&*X&yMdo5yRpa<1}ys{tEn{X4hWjeC3$CB_IV;UKK$6xFEu7ZkAHo?Zq5RfG-64NB5fq(yrrG*;clXn#ZGaqO9y(q?@x%7L#ya48^Mafpx6b^$Q=;9gj z&4z5hLm-wuQg40pD1724WSL2I$;x(b!$|w`EtTdhQVDZAn9ObfTcjm2s}Z6%rif}5 z9@~9tRAoZlM8PLfj1Nt_Qhb;12RMeSR@gH2=4on{<^DQcaod>iXZaC-5CfkHP8@CLymeXz~QfUH=*8UQ0S`1R4TEm5S^eFp8!J1*zLAfd0Dkt)?hA^FecjVUn8`qg zVrnSZ!cEoeg)#~AI{p9NtM};Ei}4wRjyGj0HI>V;hZ6iOy*YB!i*VXwZ`X=jWndns z39Pfn&xpL#q(2Z2e##ITUsyDcQ)!mumtu+FhNaaacc2b5OJ)-*lSc5lxt1+KkSR9k zAVvnIPQ4Rf%`LTq_+fci#66T}8Kpfvaxr&pvP|)bVX`U?H9!)eZx7tQ5!2XS`WXK? zs-6GxuWbECnmn8Kg(kgGEz}F2)#uc zj?nSG&;0w}p`H)tw9r6Yfs7@9kdPw9yX(nCTu6&B5D}P?j<--@@b_pb*EV2A3@}0U zH`Ti{vGvTpuQQ)~@?f`JRGQ~wYH_CNAZxM=Wghx_I|zctxc{VidquAR-M{X#=!b2F z&a5K49P`XyypLZhGGgBvK)M`KO9Z}*DyathHcHQOa6vQn7DXkpJzppqM9jofAx+e_ z^blVUxchbL!j#(D1vvhUTgxL77L}`KU3RA<>(?W~u=7+Iuf_1X+$|tr;aQFRMoqFC zeFjs1V1a;*HB{It8Vs9xpi&MG4HX2*SvpAi4SCrnj|S%XfkHvmAqta^pb)ZP$ea2d zNoC;A_CTAelV7*94KOnD4U|+ZSPdkGXI=xI&_Gn9oyw%Bezo6~Ab_ z-b!orI~2o|4oYFIzJW052=#Qik=kfHRANE^ZJx-$Ly(1~78KTDriXPe5TOh7K{lfD z#x~A@i6AwPKbZdO#Gg7&*#CqL#8*3TD7fsDDS(POb2%$bjkSrKjDs+oo4v zUB%`i*oCQuBXr=Ynql@Th^)A0Q5;*el;Ut(l-PSx{f}l*E^!VN8_90x3zNTr z*YH52jLSJjr3);4u5#iVJd9y~kQ*~!2|EtDV4JqQP-^ol-daW>~ixsn+l_ zr0P+@&BbVg_|9CN$m4Z7q3C8Xg8Ohtc*~OZ{uX+}sQ(7?oHMnbMaA5d%2O{D5fj7p zHqq6>^?zf3?}&0M3f>jFTb5(b_?@dZh95l&!J|nUx|$52s1Pp=*kJo?UV?(L>I)Az z8)PXdrL<@Xys}g@qdcSMcMMu){sme-Kv5y7->th`)43nJb0%l#obl1uj%C@CqfBxj zT*LoeGv}xGBtIoQ8396<|87|Mb+Mk)O1Q*{B|J)Pj_&3`K?Ydg@g(AKEl*GuA4u74 z>L0Zhiv@qK2BqE+PM{s6YjT9xGVk_lKfyfG?O@egj2$loU99X@q_jjZ$CHnzw1ebq zsY9+d{!kOvnf~ffVYjmNeOA5k5*WXT8xQ1@<*HZoRXEh-L0!w}fCg$YAN}i()+fJ4 zGWKxW|9m@hRroCa=K?zhTYN2!0z2+4O9N_n#t2t|K_tIG;tgKL@gr&o1V8Lf;g->% z0Q-t=Q&L#|bBvVB0e|ifQcijL2(Rv~zzb(%!o$~IQVqj|P#K2T;yMnK*ndA=C{(^O z@~+PY=Iuje7A!75`zJo_GWt0!mGp1;4d??Pc{|(?{lAYO|3{HiNjYv7XW*XCSSlcR zlU^-DUybS0v&TvN?IU2O*f8795Z|%%1i4D$Af>mi&SmK&U0^u!bjW4N`9iA99HinC zD|g;y1(Hv%`sXZkF^E2+K}a05Be9K#hE)B0bb4P}Z+S+^d|cyb!$514LZMJ_gGT?r ze$2>x_5N~lg6p2IQcp?*$|tE@%_0Y93nq=q0aK2IwaUO7q8(%PC~U--c@DrfI4~1k zD-5;~@;@dtVznJ6BS1?1Abb<62Cx zj**q=PySQ@nhM?rsxSOVosD<;^3`o;W9X(aJgdJD(tjJto8nLEW=484#ggPIhc>UM zJigg(*qcvxE>B6TC_D-sX&_L;0T%o+2|MOjGGUE)43P`OgAcalNw7d2U-?=6g-*5? z0RDD?gk|boYO*}`B_}n6aaAFS?^Q7SHYJ79E`A8ZDb{%8L&=n+cfv}(K^#;`4?J#_ ztXXOV-nf%Ok{wAeFaE`nr2WgwD#ik<)A4l{HHV^=`mpW7rYzm25K@99SrE)Qyf+bE z2rP@1%yw!CqY2v3amL3TvFTUGG1H8mUh` z5jRw`?)%go-kRq?a((;Cr4XSp^(sesStb1xsqzb$SqB#rXv;n|BIpZH_)=!`+v*L^ zPdQo*NTo&vuiSF2`ayNcE)$uCY^*ktn2i2XD}^so?7VBK$FY59%*pw(sNf0G74TO* zOL^LLeUejPrJf{ehX6N6X^RF)oI7>tc0Y#8@~(6YbB|>%z)Nd?b4T`SV6cci?31sN zZQ>b%`Ouk@?cRTNz|gjsQhqIPA~zc7n@<@?V?&w@ z?5Eb<*D-7cmykdeLsanfQ>I^(f!IG2ocH>qQkQBEbc9qvVo5h`s~k6z09*K2;WR&#MY`~kK!bGaUD|6d zU1H1?AV#LEc=mJNx^;N&qA}WX)wae^p`L$?Z@`*=U?6p1e4U%~FUyJ9fVQ6fBblW+ zTYVT(&%ajG2oZjE5WoU84dO!eV1V+xNt3ND7X&2Q`9yExH7t5%qp-pJIUh|EI7et! zRm#tsJcM(n@;I{#Q zayrk%va8v)?c(efL+FHPe;lDmDFJ-K3YkVhrdPn!%qUHs@f9z5%OFeHG~sBRh^>4o zc~+#kf)C&C=Q^rI5T~dx*J21lO$&YgfUbIN`9&VI6pw%trpWfd0V+hwt@HH(JzXH@ zonNk4p~g2bqT1rG_qdT7<=1*G8q>c;$)|QwPNFUR|B?3AVOek8*03Tjr(E{harl^S$5o{j>ks`?@yw`mHtRm}8DP zrr@Q;T#a%c^$G(Zs&k_YU`s)E7X~arq>qD{MF-HIf=~adSkKIXeGhDt&j3)nGloZW zwfI!31GW(uY7arGTELF->>U19n{5NTk^+4lace@kX`RVDPCLz;2Aa!JgI58Z$T55! zRRsylP-o`*sRyLKG%N$!0C9Y-qKscZYx+SB)>V^EBt4P$~@f0t5x5 zw>tXIjNt`Da`N+a&dTNoF!&gT2de|SS|P7O2YNS6tj;QJG*5qyR+y`v45%`|hjb?nrLN`ww8*_@7Jedo ziJ=-Y2pE2EdFWm<__IM1mpItyKKHgjRtWYN=OkgiIQ`U`hWfHAfUrl5GmY_TRtM}m z=4l5o29(MpAU3`#e!~-0&**V|qME0a`&qSUjRaw;ybar?3%ErpfI4)~J=y5Dl z^<-4T=O07kC=;phIqbwPQjP&3WLDW~ua=W}`cSa!v#=m&%NiI^P*AiTRG(wg;%2KK zBcN9&C5i%RRz_guJ*W52f0n}vm68X?@_o&;H~!NW;}Zwzn&cVS>*s!|b^#~|B^={a zKs%NmI{g?y#7T2<$V&x8Jbe50!RC&4U|u*Cp4}%=Wr;pS1eYj* z+f`-c(Tl2`Vm7cGZay7YyqG+b?Ou~he2joGO5Gaz(#b82*=b*AX+K8Y6yVyLg7QtQ zF5^8DH#+x&cFf8Ef5;|V-kaYqOJ0b_de_g(z?DYRY148WtMn^7Cq+gIZRgU?-V3Ey z9{oGTdm#V(I_`dPj52O;R<;dQ1<2LUm+BZ-SHP2aSlh;?^cxRC&8P3L&|e#*@x4MQ zmC~^(06b7=s&MPS8KA%Pm=-j#bY%O8e-(9c{DYYLe_sQrjNx|u;1Zy#DF>z`W0erG z>Gn#On2mb4a*+l*U`_CVkqMtY8!^fklrR;WRu`?-@4Y}~C$r7nAqHvdu7mX@W7OUA z5tEXi8K>AW5$n&upGOsc#cpgP7jgS4Di&|S6)tC)8r z9=Q_yIxx=!#E}>=R_0uP^|~|PFCI0i1JF>Us*UL{ge{%C90MW4eY%`89kNzKzXLJZ zXlJPdzjRVDTAdNhZ<#AdCUo z++F+b+r$h7|2TN>fxao~ftM$Uta-|>U#VO;^(mFFGY zh<1})u%&J0zYO4j@E{|&@;>VQ^RoyLUgswq>WX2opzQ_d)hEb9{1m{~rOV z)eSy5m=Q%&7AnjJEz}`khCC|8#B0vU&HO)(WxB(gtThRDN^5 zmoA`{b3CdjTU~r21CTVQDZaWPWL-JXP9D?Gu*XbjxrWTU9Z83sMqxr!YXCB7$C4zz zMKK6$VZN|iH5LJwZv(LOJQX`jn4utm6aay&1QBdDSXg-F2tBfqv?5ck1d@cwC$W#D@ti2)vx~c3=tRZLpYm z4N(p|5cyMs^bb9*QwF@BpndiJ&zD${KO@2V$&$+-UwXlGF(q(O%9xAmA_zC01GKK>rtXO>Ib~ML^?Jw$Q6v#X8{E4P(z>^g8p{EkUgl~likGu#W;MRHl zD)BcUvr?z!K{FlF3(b4IClx^$ELdet&X@gJlNc9Y8)#@U-&?BdU;q4w1R;{ujl!ZkdoT)_6@rXb57*C`CPeD5CHU- z|9x14q)kNd9WqHP82Wvmcm(bBnaouKkb@PzK9*PUq2|cC)2%{*1`^zZDIH5lIS*K# zicI4q=ev!l%JGgyY;dJXf0Aik_Id+I-sE7d)v$Y^BO}<+vh?kPdSJWu{1QnmwD2rh zu<`He7!|!{JcPZvFX;T^wgcSRt_wiPD9ZXN{p?M7QF?khh2!olk5TP!-x@-5KuflS zI|DY52=tPA{UcOQZ=HM z1V7M0m;eied03_5sYhc#h@ote(4~ig(MSG8)0^0`Og>!%qEx4Zoo9)&yo9;Mr`#5Q#bun{?0hgwi*oXZRldE9U7z1@gXoj>-52YH#5Q zf9eYem#~gJI33d=6({|RY#`dq&o-dtgn~8at(ga&08sdpC1-P#sP-xgn1VM6&nvVm}l5RU_3-|C&07w1^|!T={lxX@$mLVbilKi?orefpT~{?p$IvU zGVXF!d;#4Ru+Wvm+Jvd_7*c2Wqbrd{q^v`T8E`wj7>&ozzPbF~*QL~~VJ=a8RCo-a zi(Z2aS|Dor3 zPU@HhG{f7s;(#2f2DAgt2InT`mnf?E*D}D^J!HD64^^|qjtGOQi(+z%2GzhhOclRs zoedO?eV0CaAz#R3gW_NjYVTNDUw}0J>c6VsCW$%~TBIZ-h!KrD1!!;dCnA6+MvRd5 z6~QA6{U~yI;^k2=e@&svun+VoQ$4syb~`J9#I(KM#26SGB&Px_W~1bN#M^WJ$;m**)tUW? z{M|1gQqKX6B=7+~o|oi^_LxEi2Y}X;7?{gZDWo$#Wa9A%&5oM9oB*kRPaop|0h1p? zHlY>4<|}_aGoWzi$c4IVfHO=qt{VAK-GH|14hs^JNwd}-kXXwB3cb#MqX9H=FF}*n z!$up2dvSuc(Qgd0kqn~xFw6SZ^ZX8J^8FiW{(Swg_OTikIxtIX)?^1I%t{#D#P)xj zbu0|Fx<%)?(`~f)Pgn@#ym>CS+k>0-etTKXl3~1eoyo(W+#vM}?)1Rz(Bmwl*yULT zQKXKh1&kkHe_0M+e2AV&7P*VIShcQk`-;~i3< z2Mz(!C4N|R^oy@L^)^B0FB=r%rnC=&W8Ww#fvrSij1cnoRe6oeNnTzeQw6jYF@MA? zp(};n9U)OI*z3wIoe#`RB$3zjqL*yZ# z`M(LD_vSrGHbh1Jtm@{3u!5UZg;5Q~uFC>uY4S!1?cnP*ios(&xAjNhn6;n|8;r}9 z-By_;D7kuKtF`a&>zjHP7LdrK%3dP6cFczgQOrWCiEuqGIU~iOv4-VYDN6)Am!ww5 zo5;)oW++J0NKE$W4`1+7Bq3~f z{1ICyAkF-qVl73ixgV^jhoJ_mEdBZwC`?Da|GAaC^tXRURt%(v9q0-)P+Rh!xWAN4 z78nmC)hrS2rh{#L`1a=lZFJ!A0h%KfM6L^ZxgK($i3Aeku1ouCJS1022lh zq_^9qpXxIM6~+tHG7oWm#bUjy=Rrv}h!)qSzw8<*?05AvTri|G<@4hRw^0cfS9Q=m z3_J@E6T8dJQ7He+&0vy%19rvVVW_W}p*vz5;*?p>b+8gw&ozYoR~C&zf5a!4|BRhr zZ-EgG7g9ttc|z6EQMJFZ2@p_w->yd`Q~FZ^<1N<40hn!i+5V2MRe#MIi2~Uo$AWf& zhK1L9vczKyl=zk+M{jEE zOet_P;{3^nH0p2+bunCFm)73J&}oz6*OK4Kh3T_a?yNJEY8@=(nea~mz?B1|ml<5A z-`i4OsO_i60OcS5wne^1wGCux4aMvtppr96HpcXI2Z87`m%{_45dZ~-mly^m^i;^E z`#+nRU5-tXi@+z~deJcPsZJ&8(Ji1j)f4=BM zbElVy{kLA`ZU!auBM>-!rvs(h|6Np?#J>H&DU=$kVm47A1mpNFB5yYKJdTQa!G6PS zBs0Mdg#XXJnnu$~R6TkWWvNK_9jGHL{72AgX3G&UP>X*)#M1@W=FZ+;$cMlpGAn%Q zPX^2;!?OZRfGVyn_1E2d;nKd5cLQw>uvknoZ6m(4Y=3<%*)w`tC7&D>Eq9C|z61Z; zlV{i&Y9aMXC7rzi-=!>tC_0z8lJvnYGrM|?F8e-w=WQ;SidRdRmfi&b&L{#P@C@!? z)CsFke?S2jUrsUyt>9$!014;yCy@|rLpw@^YRHat2+SShfUw;x7Sl>&KVoq$%YgPF z?({w5-H5?|;vm51Z2)EgepeEVk+`1!>S1wpPGk84B?pOhR=xaGaBO(y8x2wKB-|gI zp+t*CBdC)U^8380WbDmUqC?>hOU4_uC`L~gLnx+9W>#kX+T|2}aom3UP6?#;ifPnp z;q86c=zdY+{WKv^x(N0dk4TYWpFv}_t(DOC*UV}dE!<5%(8$;xKtte}^mmx&vgxIpF_e2MAhD z`Pv!1FawB(2V_7zG1~$4)5)qeE{9zJqyoon7&I9M9VX0gm|9N!6}ezWNGxf#muo=T zg8ZYweGAm|-!0HTmy`bJNoct5X`>q?j8pu-hVid234=?%t7ID|5v#pxpMZmJM%UgV zsc?5X%xzJKiXAeR3d||L&`te2^`L^1e07?gCWfUx6hJOn1ZXrQ8G{delPL6;&Po== z2%O|!!$}`9odLhyxEB^I)M}+dsJ1yQ*f=)lwn%V&JQoq6gl%j$ziP-3}n{cHZ4%=dYu!xC2WdpieMfm2;2&^BN_jS^< z|05Q|mU>@)2F0<~&2O7Ntt69Yf10D^uwR{1Qpfxf!p^MzOoIL7eMog#I?Y3kP>R@= zc;+crWq$E%BPhRnDQ|OXKyto@P4?BMz@$Ky4>VK+f75nDcIAZd6N=u?a9<&*5x$qZ zRm=WxZ9o=;NR^#%!L$-pl6tg$Z|bBofcXM_Ux2Okqpc~X=9Ax2mwlyV#a^jmzy*Qd zAP7z<1v1D3#OEt!E&70@$Y3xSNYZ?xLfe!^hv8X&)XypK@M|>iUAq8`a>l{#LZi0j z^e6)dhdSyESnBN!(qR5u)iyc}aF)Jc<0a@%!LOP8cpHg5$mM(p?jzefieR|D48Ec& z8x6=>TJG!P5rbu(5n#ymKD?FD>_FdI9Z=y>bABh)_mKM^)N2*oI$SA;+#&@ch$3LS z9HDZdI*X2`U^qJcXdWJacxF)ho|IelDl3Q5-?>Lc=w7ZtWR_27RwzVw(jY#mrtXIA zdZeC3_>h2PAkF*;ri{n&QWV15ABP0hE$@GHhfg9reNxF>{b4k0{yX4eHQ`_I6BfXJ zoja>LtNG^b)dhQh(x^-2g-t+EBLNlwsaRMweR#+W#*UTa6Vw1ntBKNc6PNXpGz)a*7EtR3{px4w_HhsR%jX^K$0Na_9!$6&p@ru-sOtz zIvf-jmzroJx55PhpM=koOh zqzTXrmxavbgQ5(@weyJeu~~h6M+b2Wh+0EH|4lY$MV+;L8FdS$z*cGdi*Y>MUC2(G zve4rqv}FNQ$^@t6qKYu{M{W+L*!~iFmd^*D8$5nK4BCRiK=F?_i3~3H3$w9MF!tNN z(|LbG{%EZLzRXUESKmJSDj!lG zPe=9a%NKQcQ?t`>VTzJj8AQ=mv`}=PAXmy|`MreoA$wSMnRKRwnWRKd1(576Fi$Ec zpH40&)1y!?C&y#A5qeQMcq8v?8BG0JmrfZ{SKYihZVpr{K@Pr!2hxndpoFA$0P#p3 zb_3CE<~OhH}ADjXJw?pFxvmWTXR34BHkTT{1<=_e{KUD`dz2e?=gbcC)uVh7#$ZoP;QIDyas4hj1)P?eae*}$t~}j z&)m#W)G!qWLMB@Yy@WIx36|zJ=$s>NT)beys685z_obf<%hjb$Ll_!eYv0tFK8;VhEPsm5mjEIO=m60M7h)9 zcO2j%7%RXV&s7w)r`)y=ZH-(7Qq3{^qS$*J4^rV@e}GvqehF~=f1f)5>&14$Cn$un zw{So8Dn#kUx6{+@{svJs2TjV^wfzu7bps4XS}EJ$Y4vzNnG(xYRCrS zP6tSku^X9P*Y0{5XuzbSqz-=LvT095a1-uTaP}Ls*l#zk&bpfd9)4bgMR^lP$j&`yi`9C%B{#r3L(r%*0WA zDW`@jkhEt|tXN7PG?5~=nt$A!Q#Gsmz>q&uL&YKyj2lLm$`+o_T10PA2y0vZ+Ig-tCA{vUbve<|++sG(%I2a)C5{zlpe2|~HcQXBA= zx{oj?x6$(*LW7m4Nu|}FWQsKh5lSklVi-_xn{Uj)*tlx=J=XhJrk!b-drpzMIah9Q z^}asu8&G}wAVY$~iWXU1sZH_oC|_x(^nTyc|mq8_q_E0c&Ia+6M}3%&wCaN`8&c<{^htbmI}C@MBx9IXBO=9=f%PPGv+j5oU;M)TVMiq4pBs(~w)B6K2nr>-#&8>(4k|+RKsd|M`Ab z>fPMqK!jNNZi-mj1(zK%VtvxtdS{GppacaN#uOlWK7Yy!c<^$d3}8Ik9?Xt-nsh(9 znT_<{lTN*EIbeVzI{hQMdjQtHf|clHRk}6!kubu*R}XS#AcsN1%TnNfh8Jf7EhpN! zw~n=3U2&cD!eHB;=^vq%rYwfRvyJ&sRM&FbHP__M6ZYPp-rgI!C?Vq~mXgjiRXxst z%Y`%r1>!$^xlF2?z#{rtKY^9|kRhX8I+T8j7`P7OiXjH4&Ez&<2TNwb?w@Wj??B~C0=E;vD$yJm+!(l zmaoz*W*rF{f3U=mqoZ=wM2<)?$Ah}dEgO9PaDQq#+s5g+^8d(~{tqEF z3Z!y&a@VS*io=f9DC+Gche=KhvRY_01^wT^mxWLe`Mu3TS?Po!tP={GST+Qx67L(e zMBeKG1pMv|hZ14`{{ae>Q(YE*6~KoNe~v|0>RLpqw(;&sM{jv5vY8m>EXw%jC|(td zlMl?@T_8Cv`%m}vE#d6_#J@7n|MHdmhhxky9p9zdl7+=4>15@Y1ob7x*Z}9t66Ztl zwb-;Ysj83g{m9?Hx)Ytfc;2!8(~5I_oxpWt`21B( zf=xtG%}tSZy@Xn+VfMRf8~M3TJWlH4|Y{Rak)(u@@rOTVT-M#NGpBwgn zjygo6mT(T96=(l-D2!J2{_8vPJp`k34gj#2L*<-qdA>W6C34vCF)Ph%E8}p9F{qVI zjca|wu5sF>E_}-vFN1saMWLTWK6KVON6yVOZz2m}#i4$|M*#kG+6Y>t+a7ht^k9Xm zcKkQ3A$kI*7$jqkfzQk8M7Ya!{=6t0bdtG4IbmE|V z>y~FNV_@V89}Q>i!WIvqW-+ob>$}+JO>sYc zW$^cDrTz1xeMlnA2VmB2`>QjNu!s)=$XKcnjC~IIe0$t8<0m?dQ-{!dop%0Pi$<1| zY9*A)MYj*;l&eWgOb{s8pkEih5$oYi%$Rasi$d%H_5%>XAWux5?BycD3rcRs21_ttCy#L_ZB7&(D{*I zma~JPLVov|vcPA$(OFltJN&tjmO5=jaJ-lhjzWFTNr3_78Tp%9v(~LWJMe3P+QyV4 zZ_1V9kIPA@M3ml&KJrhI=ETz@>c{KS_UaCDD$Ug&A@$GOIsYh7BWf&$@9vP7h;p{t zEf2{5!EU*CfNk8-lVLVRwOQ}hk(K*;yfOLLT8MMVspXqB<;{g2k6&4SZTP2q0mMgO z$oTE?#BO2v$xN{Z?d*T7wN*;d!h1G45 zvwZ%3)5D7pq3W(h3sHRkHzs1Uz#i$d0FZGhtboR{CS(>ok7ky!kL$w%@Q(a0&AWaC zCVJ*cU^D3^mSNye`@_jyixQNGQXh_)%mxKT-tgAOi@HKVrTIq!zi*2-ufrN{mzZj- zcWQJ-^=zAeJDH6avd$@IMt`T5FhTuxX+MxweOp_FNK(ElcHpQww2kTa6BRboy_pg& z8&u#w(BbzMn$dOwe*5IPH~+pJF5dDO#nn1jhZ{9rpNTu=!}6}K#PW@>p%HS$hE|hM z?@xv?)2^&R>PdZKERviGkRb`ZQ7d|Pa(bv%SXyS+V@S8>6#g9F;imkvM*?f#it|Or zN{L8^K^8~hwEVSbt-_}sS9*nYR7fqGz`xzqVu7i3|y$)l749 zk5@7r?FhUtg%?9PbH+vn6#I-UuJS@9gk&0aaU8Zqo@o|4W(>YWhQIXIRDz%oY;E#7 zPZ-7WmrKv{PT>X|vofBOG5g2l^Lb!%IVWXt%lYXPS%~OnXdnzFJ>if39DTIrH62s9 z+geJP548VaP(l(;c`xhqg(&BP2Ye62h4__7e)-2kWnAI5Yn7Uva|!-a^JN z1qEny$Zcbp-Zb=d>7n@y%#ZnOf(=e7cw-Z=;$#rxE~}n#yd@0~c}H@GTcQhle181t zN;je1nd7jI#Y*S`Je)#^aF^3SV+i;>4)lk{LAp8z3$Fz>dRSH-`Fe_)+)gE~+=NXd zy^wK+$EavLpf@`>9{N~r+D_buHTPxk4Lm}w+RsY<<8Ve~?}B1pEa}DqbyDoSh0`p} zg{lbGAY+aPBT{T5if=KPw5hmtH; zA&Wa1(!cKGJP^f8@a#s%W6+=A{tXAenjUG94oBZiHobqn^^euSJv!vhxjUt8G=1w! zh%cM#=P$+a!E61)vqQy0L+*`JOmCu87==aU{rQWB<|=koOGJ#KznGgv=Kz#9R^Pv^ zAvf=QA0*iu9oGM@vMj&1LQ<&}IU?$0lz?BkKf$LEw|HdI4tOwBPd6=hVIzGGu9VtE zB`}G5?cz2$;|DF=Q1SBRT2ns_^HHHtU)T7%`RNrrvu|0NuLJNEIA}UKT(9V*aYsK| z&ef41b)CL+)!t%qfk!7-ktnmMbux1Z=YJ;CxXUDLm}b0i{Q*9PI-VcSckSE9db&yR zF;H@*G5EDyI?@Ys67!^*-P#P_nyiK8v-qnLrQ+M;UTI6araW!dnEBKqR*jLyO#`~7 zsW-8tclpSvZPb_9Wm@2@(BQQOk*eK=EgH1i?n;T<(&*OeVf#^=>uGzLcytH3*#}kmh`(!}m9rx2CfxQeQk^pqj9N$E*X{j9IO_LCCl4NS-`g-wRHER3mNt3n z)4PuC2VL+6u|!tH_yOW>$Cl783dAQzBiBlhh%8=y`12x)%8uc@K$6Qgx<8p+nzcyt z>{~Fc#Jpl`5-yM5ZfB&p67$lwgL^Zzi`Yg6;g*p)cKui;`z!Vj6-_q zn3*B~V2)Gfs7#j98sW6q5SBjlb#sJj>4&ug71XP)}o4sA6^RbUqllAp{f zuCPe+xPSh9!!F=|wv*YjM)%!9oovc>B||rvc8KvMFD$foHQgk)=G~cNfivf&iQ915 z2Neb8`daboaNx&FO%07b zS;tv^Ux~=nOZ;6ScAIwJHZ<@0N-VDko7Df3OsF2K-S|)$d`=sX_g#{#;et{g1r9M*qVa zpT?5vqha5&?2WcmUQJJ!_09%vJBle)>Vk2ns3+1JLmqVbvBZ=Efh;GODfe+>fju?J z_Pz+VvFsaU{L^+1xQ`n-?h#;0>rYSO0kjk(Y@pH3AV8}1VMy3G=37ji|ClTCYdr43 zu;pwDZ>5!;v*1Pa&16l4Lh-^Nbv`rm_GAdQ!+L6OOLg!EO>O#M5I!h6IUG0HLiT2q z9l3w)v zYsB&8>a&*EMm7ry!#)%$YJVs%BII88=3T5ih}AFWa*GiXKY~SNWkgwP#M|M9JkX zR9E&*Y8M@s@Inz<8|{|%8hx53Imds7trFX%Vp7yL=wNVX*eQgc0ACX3 zUwx@sOhL!LZ&<$ zINeUVa)D@!clnLFlcQqQwvlk^fW2F)19<18uBeUg-Xc(6zmHt@8ZN_J$iKvQ|AfDO zl~Bz8()g%bxZLK#Khoi)a=MU+1+qkG!*|`&Ke4;9Ev!rNi87`MoZZN}Hcj zvxGyq`bS1g==FKL`S#kZ@Xfg^?=`(~4NzeC2uiVRPR_l&4CQ2$?}At6u9FVLS8zeN zo0#&~Sjv9xe_PjgTu!yDgS|qfK8rq_8!4%zC;^Y~m4dQ6-KruO&s=42De&c()NUQ( zM{RrO3bXBoagN2yjU6)e;l5GkLSj3sGL_MbSfbB!6iIQ3wND)up3l1<&>hI)T?2nL zcg=45WD4$(662CbwUvX;ssYVM#xmkt>v;2Rp%d4RwpoMIz86T0m!a26HIB0%S)7)R z%{AJ}m*4`<;%=hEUtH{E;d_B)Qu<_ z-{%gGnlGcxG0uySEoj$YelC{9y9o}({~e;BaVy)nelhEIFsoMMu|v7ii6vU%d{oWT zv6?-54lka%6?GdHu|*XoQ3ASfW0Sv1|mX&Q%aLS(ty^UtmxTw76SOyTqn*}d6rn2 z2cr>k5a?nc~;8=;K5U0>S{x!o_@8)H-2&V(VD=R+_grQDRLdf&R@$ z^0)nO@`k@*&CML^iuY!ySfADgmV0htuzbm_QyD-^i9_OADthT6MuAZJX20C}9O}aG zM759w#;Yq&FleE-qSdwy(GSzdqW?sh0 zZadBGY19TaB&0O@7585J5TssZfQ<^~tN$S^iz+s^ai~DEhn2A0*>p zr=6;`ux*$5r9cVQxof{LT$HRb(j^EJMcQ`+;R=#2vE@%lX>#981?8$N*i!g z?@g6V#D0Yjdh+2Oy*+%xW%t)48vD+7Rx@3#bqa#le7)}x@E@NqTSJJ@#)90v+Q^?? zFhu5XgA5+(LzWx4!hW>?~0=|=y9W%?uA=;Ql;m*`{ zTk~~M1*ZRc&Tt9^i)SgF;Xv$L%lX40illBYc6V9w%O%?(^+R*=l`Hl*)(c*{>!4zP zBry58FmN5|p-w;J`h9n6lFRzIr6YKD!iCJsBP94M&&lZ}t(7agbR*AK`!X_D`9h{Q*0!Xe(oPf!qnJ??-(JZD5%YP13qPIdqQ6yv#0w(W*g40pwP=`ww4MnG`q# zItZPG@VX+0afiF+njpI9y_BEh+rjKUsWi8UvIx4n6*7cpEcyELjqWeqZ+5BdwpM9Y z(75_mY1%2M?Ftra)u8J_Bj?&22^nw1aD>&Bgp! zoG9Ui{h>XR=eDQD(s^=u7THRbOBV0gyK#}KPe}(b6SvM2y~&xXJ{O;o1RhI9j#EGH zJX3W>ZnRQcyO_PoT1YmoPNaNBkn-AH>($=g4Y(4{S2y^7f=?pmCcEIz+bK>Yw%owi zpBf#HU5-J&L)Zl+gH<>qkPw!fP+CYHt=;eOLzIjeWCVC`LgGyriRT+&x;kur_cvqq zl(fT_UeefirL$!>^dmi!j zuhW^mnqnA59&gXr&yZtVV?ey+9>(!w(A8_$+VJaktvxsG6VT@uDXhfHv5sk|DKF9v z%-s7?F9K8bZdm%cnNUqHR_@V}m3+TIyI^;Vhcee*y5m#KO##aFMj`2%vrKFnj+|1q z6Ytn6F}1ab{&I&QB)dzaZy!X*`0!-ICU`ThNP5hF%-48?%zLCo8ENR~94b-d+Wd-E z%H_8;T`Pq08p^tsIf%qT4P_^qAklW#?^+!j9>|tB8f~PCPKeP=1SNX&$|&Z=;7N~a zq;l@;JNZn_MoeX0b$r%cYy1(B&+G5o&&p^P2 z_F(yBM!Z%h-xfPIQGIYQM*@90bFC^*7aLzb4R5}#MDJsZg`~4PJu1$@pV)?*ESwfR?oPaYFWC7MJE_n7cb;A>ZxNX89=`}R2Cvd|*(0dNuZPyP z8Ikk+?8~@Q0GffX{;t^xk;Vy!{laX$ZTyd86KO}wx$#Xcfj67S8z$?Kdx6&<+BerG z=XZ{evY&&eA{$GoD>`n6#b}-dcXkxTqW9|-W5l~l-~JY3=v|JCP9{ved_K=Wu8^8c zNUqFtsmGkAXLUMTQq6pNv=6gaL7P|OSnRIJ3}Qt61nxPpYGm}% zelIqejIc8YB8AD?8=Kz^{cO!9U~=2AeDguXK*WP1}8O2X=WBZOVrs;rKf)8GSHht46M4`A{ooCYMi_2sv}nITGvWdT%?@|p z0Iw&E?+l#D%&l{tTyd@?ip*KOF{T}k5m8I-<7Zlmb;nn$70Qt&9lLdK<@}bD3R(o8 z+C=K=>a2gZ`|Hs`^&qoUcH1~#GwZ#Id2U&2RMDqOru> z1TM4Z1+@$8L|ISQ9y3241~M{j?e=5N?d1hobwMJDaP*>ToJs%p<6EQdwu_?-ar7x~ zbtaT<6j;pU{fW|*br+AnAGkeR$#E%rif?lAt^Cs5Q~P!Hk5N^xXZ~Lt1`sOypI{q| z5&j%?^CFad^xFFm^F}BI?c`0RRX1k@G@3DlhxfoG2fX0S78D1t+(IwHbz>!N1Ofhe z(vAS8Ow#D%ueB!frp=OXCyyq=w8cf8r6>B#S%_s1P)CnW0^>RcIt_15PmaSw#ia1Q z@bJ&JJNVYvoj!GBEqppvHk1;m-7^-hNcYt_II`i%RW3cHe3ZF$j`0?xgaG|=3qF{#*l4qqjc&g92F_e=Aki33w4!2s5_I1tX{7> zpS|tg`WP`85EH|SnA(Ojg;02v_CrPOb#b!Dr#I}DlPpvg4X&3M3`4&U>RqGeRI8_U z6S+k_g{F=$ReXDr-gDO`T*6FiGCN2JjM0Ew7 zq!WH(R+&ZX(5Fl*dpBN?^Yut}F(Tr2T=znqs%PH#vaa$QHf~EU?d*_seG^%=+o@1p ztjxw8%Y_|gfx4b*^9_5_sko1W z7|HrBe_L>TJ4uFSz0{sfto7??Dq-GU7hWNw{*4%j$__SbUR& zkS&8USDXE5*skm)iTW8{7FGw8@J?j2*3X#SvG6=*Ghqg`(yj3h;)JEHZp4WM9eWC1 zl^+VRycR#69?vSB45ijxYQ-)T)GE9*)jA$Z#&pO6z5}m8eXE6_z~vJ@w0fW3>5^4MWWY3@5L+{j~kq zr>f~a0}2I+YYcr%W)MUM|1*em*U4AX3_hvW{hKQ&FDYwvCMSLS9S__0J4VV3Mp~I-CZM< zBQ%ADPKX2gC*KDI;(v*LA}FqAbh>%VGJr=Vkwaauvcvz?L)^g$*IpTj~%s4 zri{a1(VF3XWod5oJmR=m3R#S^rv7I3C3-Y%0(s7f-MdPRB3oayN7-UH$gF&w536iV z0=B_<2i`)fB+?_v)NNVat8^vzl8T^P__5~Rd^OF^gHtHH*-{Gfeu)QfU(u-5V~kmj zP%LYf2_4wk%J^_Zf1-F!5nUvo9Hf*ZVw$NRJuKcxEgQptKq4M3<(uHRkdNxWuKQ~w zZ%-%hO{4SRb%C-^{FzgZ)X8ob4OPt0k|}|yeGjI}H;uJAnL=(ZbVIkt z>WuoM-j-VFlj~P{?uoOb%s5*vMj~a*b@zAk^G`Hs{*Hv(K7H$>DOcGMIs>_S@qhCd z*{E~tbZZ&E<<#E{cF4x2OSfG5s2CFCXdy9fw$$8c=IWA_$)tUDs@oE#`t32km@-Zk z9DK~9SvQBk3rKJ~z7KAbfqU|YPHZHdHs-rE%Ei5kqwZy!*;XbS#?!SNJ@(IH4=q~y zyGnTzR9TaXjYIJ>#06b89hgL$1xC*_cjk%)64?a4ByUCS>%S_5%40E??n}naKo?H? zp;Y$%i&DN6bDE4q|>E_i{6MN z)6Khuo?1<(#An=oSBoTZyh-`}1;$|{a#0q>x91@el3PUgm?gFA$D;UXy43UM3(vse zU^{Hf*b{7!x+<&6-=)5S;a6Rs6)wH#B#EXYI~++uIw$9HtwUg9J9QtaKlxmYn!KhfXc^7TO$~ zcYSN4-cO2Iv|K!nrYR-LpLf^6jXkM0{2R7Pt>3q3y$FGT-0|kEZ2Xt3J8`kEt!`{7 z`r`LPOMCb&=dEikd25B-v*9+sr+j!nWk@%vo82HhQdy z+-32sJMH7_%f<`gqf1pe0T`%Rr-mf1@BUd_3#wJD+qKOjd$QRD&hRE#u@?eX93`H~ zM!1}(W!EXHEWd<8a*Y;AAKhm_QfoqX{||~sU|bc^N$4+2^YIS`$^zZ9{}BUWTk$s+ z5t^T*S#89~@-jfPnGCB&5}6}v$Gxnr8tRQnJ^`^uIYTo|-S3;^s29!F#?jD)Xm-Um zliYMJr4y#l+-*S`yS^4;!T6fz#kFZ9x2JA0?_jgJ=LyD6j@sEnuPM~4G%flg46xu> zLOjv0QT(x_>gYQdggUoH>X4nIua;ahD1kIwLS*7;@gku*#Gl*CNC!*iYL^LTCZvX~ zExEOl33s5P)T*fEx#kj{*Of@caba=4-nOSyPpzT#Z-Sk7C-^#Fz?=SReCzlsK9WY8 zsIQ>TwQa*Sp@TW{^4Drb{;`bXPU7yH>?z{Wp0csCj>W`y({ZYgEU}$l;1A+&&mWDf zkDIXSXprDkxy=P>^)l=>vUIr{6OAryfp|wi?vXS!8w5Fd|3yywt07o8%h5C|C;YPS z+Fl$Tav0Km`k^{HNxAj&L@_KP-hpQHHNjLx6pyP#&LYDkN!~<8hhOOH*~Ot9 zX~5a4MJgGQkjFI|METl8X6g__DUC?O-5LT3Pgv0KE zGBJk12#`lYfHDl5_n)vr`1H>uA^qQg^8F>*sw&PNNnY?Ygxz16;#C}8rf@S=q5Ue@ z=)usphGh2*FROdZot&Pn(^Jqz{9XBoa&^tKjm3w}E&0E{*V&@XWW>sU! zRucH7@s0~U8`~OFP*SxwbPBmJn~%|dopSfM-$7!9^j7~&Le4r zeRaUv))p#~4DF#UGa>a^0QK!D7L)QcKfR+|U5-a^B?AAmXGBeqJTvw;Qnbdi8odjD zTNyb4!(D=+-`lelSx=9xEw7(n@Nq(xTf}m@LT*~4PJ$-xbZNxy`VigFQQ7iTGp!@% zU{;fL&I6KVz`AKZ+3(w>nA`Mw=TV7d{EbC?hH4tK%o#*oobb<`Vi~$28%bU}Zu)j>FDWd)%UMFuIAv<)W`ip2lU=eK+I|&}QL!6~Xtw2@A zoQw16R+nN;@Fl1Eq4SRB3Hfc{d8xbG&>FN2S$7Bt#p}(Tr%+CCj z%k}$grmL(IX{^T|72;=Q&0Uy}zX-)Vl^F<>!$=@7xydoQJ%MsQbJKZW4`C&?kP8I&_=K7lfj6@8J5d_s7HHY+Qyy&RBN93@eS%_uSc)M zElRR1NuQ&nUF<|^(hs5AnDKv{ctXeB=xl*bGL6Dw<+Niz96i19F|n?jC%yu+yUxoJ zd|2uUGv2v!^SalpP6ap?n&+H}oDNb`%2Dxf&`B=lrJL*Tl%ae(c8pco8b-)#%%`#L zDqr00GxCuKw>>|B1hlYvOQl}oG;KHDUUF?D&Gd&SD`LWL62smkNws>iSs5N}XHhkr zhdO$bj4U{E8`#e>=DrDF8V6!4rdRr6uVu^~c^4u?JsS2ExkC#oCRY*ge^iqRgnz0Z zuyNvlt$rLHt7$h}bM4IRhtW0_I3FoAmL&f_ynO{wmv7pxASkGGN=rxz(hbtxC5@DH zcY{hwcS?7MbO<6yNJ*D;clUXIpu79u{m!15bP7mJi6x4FQ5_OKOj~e0S$Ck6wMtLy zCz}6Jlvrtbz%kLdO*c`$XIY1dYZ}JEMiouEj>~c6DxN^&ESsr7x7o%{?)3OaTHr>M zRpHEuq?MBpN7w#lSzF~&N!DOo5i=Kb;IL}8gS!duE{4I;;_WrQ&G760@*+{uUBLx< zC)62jHodnymNGJTD!5*pbF^x1M@F*+-kxG_-m&YUcD?F8a0Wo!Uo-UpvdFS}L?~ zMg6MHl|Hb~iN&&`AzbMfTMUStnB`K^*jKBe(PZ~=~e4FpHOGB^Z?!FbR1d(?26T+a%iYe>^5FJ?&hU)ac0JHN7;kr$HR2_vmm+>cKhkr>=)CY zrkF0snZ6uco;de8%i&yyyrjI(aO(quWvV*xw%t_|GOSxTCB0qO#5OGKx&u3u4$ zGxh*4ySl@akX(tf`jg*QC$eGJF)GrN7xp;zZ0k|^R6@PQaDhI>defVZVj1_jrv?Lj z+Z>^G*$+97A3b8lZMpUxxeyc*Qi#Yj`I5q5Uh}D>a2o=MHmiO8-0afI8@;z%F{pFb4N~jhy$(-{fKPof=VQ|eaG(a%S9)_w_4gqt`pMmAN z{iG}-jDP$^k@6(s&l-u@R3D+HQb$wiUCg9ubbb0?W-B$SB~KpdmOOFN?}wvO2P+Rd zw_G*Q6)K<~r-bD#l#b>KR&nr1r{kxe_il*%`k|A4@ukZaViJ}A!6Yso*S(S0I{YH7 zRok~|GNMIs@Ssj8xTsvC;A(OFKKb2^tkq5;U`sD;unYt#f2hCOL_JFu9;W9@xr4>z zHy(pHi1mMv&(pvumah^y&O~J8u56&aYwd->8>{o?@w{T6rr8wT%b7_M z1+g%a_`e$5H5Qm3qK&$M?>^ zvO&CKiSTy)|BS(Z_}c?Y{aX)6j)m@Vx{5GS-sg4=Q%78cD4MLZF4*-?$)Ff6`2cSiC-q8imKb-dp>v zvvShcSLkEf%hlAsquz+L&Wq(Xk5IYI_=>K1rR`PH0VbqjF!c_o!aUL* ztf_a*BH7xJ9n}AxwV)Dt+A3gQlg#_mK}~E0f?&J5jEH{k1%~r~_S@BgX$Jpy`7X^7vC`Uj$tPa7 zI<&HkgO5C$x$XP*gz0YW-Ll^{YsHK+oJ7>7`g{r_AD;A&?u0V&ixjyaWdUCBX||lO z7MuOY=TBC%pFFZL#drUX8sPM1=Q!!t3%2?fow`Z91cS*x2)J955wWBr(+fHtMDLLG7ZdUo+kFonJU$3oX9b0Y2=HJig|x+lJg4ofv=$ju z#W8<8YyTD*Ukr#gqRPH2RHFArIaq&e#>Mg=yD+8V4_M%9-5WgCij=r z&=uK2fMjcg zYLh=rzJh|kCEDPz@zDR|gE8;)HE8&}oG)xi0P1e-y%!v`c-j4u2C%y6AcAOy6jF!) zy+8^9ujH8M7PB?dguJ6|yI5bJ$gL&VuMK?3@G-i&?tP+0&+4>7#r`IPmOIa3lD1rB z)gF>O3MFhVQ8xrBU+$kptiwhFe5}EdK7~|BRjDyDV*IQ>$fJb19wzX1QGVzw+?^kc zk7O$Tv^NHNI4Sdo>(Ht#41AD2g?og1DBobMxrn^q)f)3_9ok}lWL6d zi(Q*eT9#SzgTu}WkF*ppCvAZ!Fi+JRwq#a_N-3?FuBiZzO6p{-?ILTTuX5`Zx{zF= zN1sAjAhXYY;Ci)cJ@QXvC@AR{M!N~iSu3n1MR#5@@$*kx15Oh}rh z=y*t~9@8Xo^pYz!rY9+L(;_T!e3q8(qa6I^eYCyv^~Do~5loa>W9SRgLBd;@O0(~( zNTJt)rhjx{^}wK4LAILWEaOjB*0&x$Q<#($2=~He^`8eqd0ii(|Xb&9*0ErN=m1=J{DxDm)uG2_jDHM`d zCw#y|zLkPyrkN|~aR$gCt(PjS&cF7Kyy2&R5RMV#TuS$Xj+`VM`l!1dHTR(1ZV9mz z?RVB-YwpK0BFwv_?cL?0CE)?q^%da3qz-+cCwo`xk+d@+A0FGL*%Rz#XB++cWbk-3 zhjPL0AU0plkK>866sy+LFPIx+5;{dP-trg}Z@=i~=k9cbW*2#0r9Dz`F{!kWks|OE zeu}>6ARnf52TY?=Iqu)**UWsvRfQ`ZxJ;^6-4zh2) z77fY{f!O%WzJw1CWG)lpc869gjL%RLc;&QfhSN`8hTtfPmEMd`sco< zTCum>*$Nx&q?)_bY|teEM%U|I7*&}j`e$!OwHkk;r$wUkrg)~xhso?VuT_P@IBknH zel8q^QkYAAOJ!^Hf^hBRZ7zc!6mkwDNZ-f)0Wu5A2TH?X$lNP3cxkiE@!zYygH-tN zB}&vPJwn-d!uLC#Js}wpPM;S&{xa!ir@?C##xlRs7$kpdqnMlnQ;n(*9x``+xp$kq zMMT62{Nv3Lp;CSYV)ap`;3Dv0VxgddCJ*q5gN(F&sy08wA4V3F7pqkqpe9>myYbxo zY(wRIsp7lc#WyM1(l|3!<6rMfC^*C-QY*_=`uhFcIUfM(a}(~JmZfwiNZEApC5(>Q z=AeZT{`?pPeGp44z%48#Da3Q>a`O*rNY^%m%(`7L+Ij8 z2C&BqoQn=b-@26Q?b3}1?b^9Y`{PKEvB{lWozHrvYx5+3ZCp`5!k3IDG+Q$D0S3hM zJw>)5EM^YUP9!Fc%LSVeI|0Edadg4-Yw9j6mX&ENqZoo3prj^ z=B8t6{XvkWY?(Oo{dZ`W5bo10XJ9ifwdD@v$<1c)3*9U%0&gA~yZYSIuCrUQPFP^t z>5nyOvQ(x!S3cjq2q=Ff<@ovXY;7-gHV2Z;|3-b^QmdZ-AWN^vu}CxP$%x&FlRMrL zovJj(%E&=QM{>0%P*b-c?))x)@T z7NYlXch&_C&Zu{O$LM?E_o>Xr*?7U)N3n3AGweF`0Xz0BH`e`yPhjA8nSIM0vS4qw z%>XT*#Qe~AMX`(^ufrf9;id)%5{4t-W2hfLGLzdd*@UgL*@JFR;e7#rUwJU)$aM8b z(d094T@m&}CwH!;7s_g^S0BT#o`f=X#sS~#?#PiPnhXkMnwR^s4JzKQep9iSN>V%W zkjCKM%F0DCBU`RS&f zvRTv17k=?V;zgG>aTs&$DtcO}V4>0rtTb1HdWEV@=Jy~>DTrSl2{t{POj@hlj)k?O zyG}90VK0?xeiPAB5Wjh(`A5hI(Z9c0YI=KSsnJMtWpl#@s+^NpN?wa!uv}))D32D| z&(_$7RtN{`nh5E9fdMUxJ3|K1&jmh`qT`FG+N_L9w!JUm{phLAD2<56b!Hm)9o39P z@MtnnGi>tt`Z|PRcmKnik(kZYoVxunQtcp)pq_>#X0nq#>I_DEKA-grykpvG&p~J| z$(D>vaA{@~I;o*&{iI0@NRV zMtan3$nLXWJEqB=dvTOmmS}v~j=u8GHHjQitT)FJkK_@MURA|co)fb>Rf$kMTWe8Lg-*=Ij^{IhlEF)^y-uQVZ^~%4Q@Ms8TsHdzaqmMvHXS0LY{jrck-+j0W0p7t%+&jcV;s=858`Lamtc)iyUpeXWJSzBEQpPT)ZZ6sEjlXQzH5i~ZQW0Yx~C!eI> znqP~w+xrQxyQI2pJ-k+<6*c)Nwx{xh&eH@R~6vr&S}+oo5Bd3u8i$qvbR@@<%d z+$p|ewnu~Gk%ub{&pF9;w5%^4L!5;w=QG)Ri=+AQQL*ppSb8o9hS%4gDT2)Q z8$0J#3}uuH+$_=dhZF)McRG0yF8&yBy-J@2Z;k-=GA#>ndRX>k zQ7C8O*b`hMvZ77m8xPvb$b%rz*EljuRGO?{G>xWqUp_+r`|AFOib|Ddtc z|CRC3rY94|>#=mt2i9S25e}B=6k-U%-fK|G=VaB?SFi0B7eyd%kk@&f1-{{79~2F1 zAC$zIcn6Rs4LQ=DEeApp>63s05J_q=5txWMm-|eX; zDIopuy15wrZ&nKw*S%fTsz)kA^`2Aw@%wLQ%IqMy1@FUW`l|S`eDN{Vjd8L(49%jEsn9i(%CbK z*fgqObqU@U%2+G6g=BTswZ$~|!;^>|pZ;I9Q4^Zy7 zsdvQ}=^>jl!i?G;BP*jerH*ucGM{p4>uV>^swOrR<8IT4Il@nt?uJ3FLYir0d?5vr z7ZT;mK1AI(tc!PyRzCz-Az~vv0Q;Z_(Lq_{!V8LBS2UME)L|3Z$+ucRxzlM&Gut?; z`*SiL;BMPs#E~n4w4gZ|ip*|MTN@|qP@&2LKedlpc3gZTbe}vW3$Q#a^EWXYAIcAZ zGk0??q?JE^blUeu$;ny}HP8KcLRPHQfwfdL?Rt??VX$Iw`?awi!+MNqY|f-D|IgP{ zP#-p$`jl$mj$CXbwynu5rWQWF1D6hqX?9ZGr%f1Bn)t~^W3$c{okq}oCO&`Sg^X%; zu7`TnB+zy*jZ|S5R^Iwh{$%`xiP_Tr*sulfbg8NGq}$04 z^Mq&k><;@a9%Md5jzi?1(jF#MAw6>37;neXJ>is+eGBv8{cUmML6b-Tn1wm*IlDFf zJX3FL@fiX-n9z0$fx2jt>MSv(2&wUlt|K*q^+u=K0~JD5I^;lljcLQ`;EZF9ea%nM zulFiUv;qyv+ofK<`+E|OLhYq&$?bjC$+Ekh%u!4i8~M*vNMe)ud0l?2crs#X`&n+ZirP|PCco0DuFM5-F9Mro!I2zsNczX z@qTLj2DMy!f`?_3nM%3L_l+_NB(JNCl@y%%exa#PHTR{x$6oS#?Ka%1_FxD#$q5}a z9MqJh%`?oS*6$yyr6D;|`12q*G%JI%VwJ4yMNQaauu1v(8#nx;>78%zcp;FqjeT?J zcAdc;N4bPNS%%eR6L-rPV+{1&R;D&NEyGAXo$<5KYRvwcY9j*b%gD9xsqxDcr7?bR zS#Z_AZUi8s)F$bat*ReyOquQyzx{%MOf9S(`;ys{k_ogaHN z{{6zRjW8%PBez3g3jeLg`v`WG;`cj_c|+}`#j74uE=?&s4WD>$xuc3Zv_{Mm$Ng!* zy5*D6=d0I6Ke}vfBzr4I#p+jJ)v>68AmLL9AmHsKu<}Bb-*(uuIBmiMdLeQfN%%Fs z#R^i@Cx;<+NF{SQC&TYLRh<|}SC;N=DUx;sGZu{MNX;{>yM9MRz=_zkIK9O7p}9_V zT3Z4sUGG`k1@zh=%q)j1?T8Tcp0`Q;L=L@uw#I8K3KZ8CNqqVJ>0=UG9iSYELBBgM zac(MrgvoaC-lHS9))pU=>J8BXOg9NoN#QLA?~y_jLxJ!8o?0slTzD4nR$5 zO2D2)+&1IU&5+)0PvwCmdOGTX2em_nJ+DA5*bo;B^;92it+hk*Q0J0=69?pCrSC3w z39LKZieTQT;z}QJG#*Jh+8|MEa1Pak)K6y4F){ zL<0N{8Y&B!+CI?^=Q-Aix2b#UX8DBlqXzn2G(j!}FY}lJwdu-NhX_U2#>IJ>&97%#WV)F0^7aLaazpGE z`MQ|apfeDz9GuF1rW4Q{u#&1$*{o+p$ffw(sZ?@YLb=|oH6g7)E*j3e{ewb>@%1A8 zGrCf$j>xSU`3xo3{MjZ~(Ro(6E1=%&kIpT??7;CFkay^D2b>0s;Mn6@E;`WdgWF~> zD){jabs0WjqID2MA;v8nJj&(4n$$;Yu(0WoBNs!~wFOB9U4T>&`i?84=Hi<-K5WzF zMIY6}X|zAH09H$!ZS6cypB*rOK#S=Xxf!@S>^Id~A2FDpnxhnHf0ixx9=X;sFxMw^ zSaj^p@f96Ky?OBd)Dx6^*$n-_;}*5;Xrv9Opdrx4#t{0@ts;5KCn6cMX;U^;5UJNy zq#^!0pZ5W3@V)e1>ULGnE9&kxbs_5)RyrZIt5;;IwxD(=7}w@*mdGkt2qN+D`rOEhe4wfkk9=ygkh-@e+)t327n0TB_H<6y-+fA*W*Hq36T0# zSbMlWaQ_9t(Us4*KlLPEf!xYW-4Z3qSZjA#<78HH_Q+5ge&TA^hl7ZgHIC^9hZk?; zgrz4fTCAwFs{=NpJz$$&PrTW12Q@(`>-+;bgLHdH=98##++X@ah_HEVdB|j<)fjnR z?w&`#p7^|8{k}b{jq~$h)!0}IX~MN#Jfob4u`~vSLU@-LakK2k5LFMAOf3!TGKd?Z zE?SC(4;131yOW9Ssl?-1USvhUSmzCO$kH0emV?)qYnnIUM};+xru6%;L4ehMxN#!f zU)(&}*{h=2J3~m0w3%&QaFdE(5IfX9l%r&j$2I7ql%m_7r$5Z0m@mxT755^RYN8Y5 zlZm%{zXbRF(|bGff$0!Y`M5U#ys_#ggsWx zY{VupGK=0q8#OcQ7X+V%9NDe+^VB(C?#uaUD@u3*6E-gxb6ESUWhjXWRXGkG`^%`7PSU^Eg1g!*g7Io|oF*-Lo`SK{rwUkP3o=GTbrHQ!rnlAj~2-Hy{ zNgS22KR$5@tLV|m?nmWi(#XN^p$tem1+wasd3w;nXbW<7$q&kyqF3X zbda=)R^yoP3VxHl$qp(-Y+J|UakpKk#%JQFlLN3yG}x%eKR+Hs#C9axTSHh&=_DFY z`WVGy%VO+UQ@Q-80Z9P@4Fx`|u!e@6IgQC5^bpgF=$4rVoQ-49)<5Cg{0M7xi`*H} z-$^t&Z&9_UaK5XWc3*g&6%gJu+O98NQ;wh3gSUaH5#->X*r)Q`q0#Ob zoqq4vu*Bxh(ZN@bLaUQw^ED9-)vTQg)ht4LvC)sJs1D^L+ukPvpwO?>eumJq;)R__ z6KF(oAPpn9BME4MXTOATHjVy&cm|(XQ#WDJKZz6<7$D631u}>o2VkoJH3TH&!UMS4 zhFDPB1_qP;B7h>_A}~vKU07G_{U`1{RRolo@;-%dtvhtz6CV4}m`;V=%{F=E2OtQC z4j1+lK1@&9U7O%OILhBe(yzc!#m6`?>w7=Wp??E9#&BVre)A96Dez`Jg(6IG<(tk$}dUt|BFg_1L;ZkO~`29gim1rSM zt3m7=KeUY}0?m9azyIV6u4VlT8|)lj8&1$1}6!t3rj0SWbr2)rpaTscgu@E^MB z7auf)NT~~Krzd#!6_lHjDnrkecbB#W*!!l<68++a29e1a$4+qdH0CWt1wrHXl%NOX z@N|HkC}%-(f{3eW)j9BQOK+j>Fe)JHyF@p5|lkXO}RLelHE*yjk)Gk3~&Fyl07 z9)3#!fWas~r0#@L80fp{fmgNi_n>O!0$vbUcwk!nJ~B{%L<(3*F!1!%7{#CRGcM8F z@<^33wr?cwoxp_Eui}9Gc5F1r>yILEd*G-8 zqF>c)GHhDuCcjx_g>wq^0(Au+iw+2$R(8^Q&v}}`)q*L&V4SCOIngu7|A7rzw019q z5B>IU%G!y_f%>EO@Bx22&YrvD?52^+ysMGC2N{642pxw4&lsea(R4ALOYs$Q9xd)d zx9@q{l)$q31Wu3%7}1oc>R1EF15Q#v&6EZg(=piIV8f`YX&~Q@?llCX3=Lw&n%8a( z_L5C^aU`MC(bz0%z)7TbP8WPc5ag<;50QkW1<~z_!6ZntCUg0JXU%B6p4yN0%!7LoCPe|>&#D>aRuh|M4H3`p zLx)QMd42nBM2MEJQX>?|C$j!pFT#6Bp}m&^#Gnbs{8zg19rk6$c7iG%C_3^dod;sJ z&3b1*PbPc#JYNynzymYWGgjA0<{z^G%nH!%##(PHi2bw*gs4a7{=YM6+pItt?f#~W zsQ6Hfk8!xz{%HO^xLx?MGGMOVnjmlhm}^6br$71jGU){aqxQFE?(tIN2FG0A_$*N^ z4=t_|x1v*Jl4qTR^FA+cA0fi)DPK(FmTXL;)!~Dz(MXFK+b}H4Y6YOgWd65?7u@?j z=!84-I{4N1)o-^c8U@osW3+qk%^yuAY9ZI@0GQ+8j2QTAU|!>@ATS5F&FXV!4=}GA z2Uds(>k#GlzJQP$BfkH|K@%r=#e*8{zZoImH_=M)kUv-B2~kUPv;SNviCW>~RQrGZ-A&zt;(_7>CNHiJ3xa@v$twil=3oZf zw?fDe(sc}MjIBjF@J8Mjv5;5$_P_dSn`7KNM-N`?quwh2{-B$n`Zggnn0K6J%330R zQ#?>K$x0!#uNcLm#34>{C4q|Xeq)Euy$5k(eyvgOAo9$r6fdj`SI(451a}KHAt@=I zJ2bG#C=3tLZt$^cMn87FLw)H~5As5k=Uvqh!WhQ7*5PDm=IzcQvuX{N~%jUirSAbvKtWEyMKn#B4p z0f#KWcH-xbsKFzpCxI0>r5pXq3H3BiW|3qve2o)NGJEJ%i!g)T9>B2dT=_W=Oh{fJ z?xZtf$(Uw;OsD9Ie-X;mAt9INvy^4}wAT*qBNqhl;4;prk@dQQ{`b`GKZ@Klif+oglA%gMW{qD64wBE_b3m<~~1D*+zfV-7ME!8`bRQ|9S z0P$U4((&sRbk-ZnD$Qq~tGAO%MMP1>=JOS8@CdKT=Ts}js4!?ZxP@pXGYD3&o8#JL z`YI)uep7{?YH-Bwjk^gq9QmRtEdNDAh!XN7(Lkphjzd8Ugk&CY-1R##%ugK{r;4+_st8;x` zz`rjDaru^DZ?SQlILC5Pn}Ce$QUvo-s(&MY7)F^u&gAatsnfaflH=jU1R1&~oiwW^ z1&Wl(=WNWX3BL$*V=i$BWVqk<2cJmP5Scn3;9L*R-751v=p)P$L^$sg;I=R@?3?Y^ znzs7DZXdiY+}pDcnHRZr9zeX1l{5frNSj*e@=)d)n$j7S__9a)d&_}o~;5bA#VK&DM#^%2Axb}M@`%6MQY-;Hr zV;3P}0U2tg?i5d9g3Xdj{=wRCvfZw*IXo5?;##1b{oLMsWHgp_5D!^jXu|(#wc}ns zq0#rQv$}xvjC9JkBU6em9l^NO7j)Bg?uo|)(|nxA=4q~jo-Uutg{b=#%>7n{NT%9u zK|k9qG5wDU_dgvnT4@;iA8T`$2LJ03uMH{?rlh3}kxt?jZZ1rb zQ9mwJX{Y2ii!GFM|0aLV^fc-V%7QNuwXQE!rmyskbrO>{t5J4u6whOX)l*`FkncF? zp7T@m=ckE|{#&J4M`GbLWR0vw`O4_6T+%HciA-BxZhRh<zQN=l+ zN9_cEQV4CiAq)5B8zZ4uYa8~olb}e6Xv0%OyK z9l18k5u}na2up3X@yA$xvU(`dq!h%M1b*Z&?N z_gRviQe=55O5r?~+!d-T;aR;LeG^*-eSuOMib0G+tS;f_cR%a!sn~#S)N_mqbO>L zloqq>D>@mRuoeO8g*GII?DOc$1E!YN+MmCtr%}iyUW#5$8ih;;$3=mVGk*!6;O;~8{NDHy z75~*j%aUQbO8^kwxCg(j#howzr?0>8!0$AWb?TCE>wR&WXi&dc<$s>(Qp2Y*`~D>= zJHq-ULzZ~FuVD|a2za{Vr8glXN1U=5`7e~-^w_R0@|;_r;Z1njS;{6U-{VXu<{Qpd zEEyC~8_0i|@%giPf4E6Zfy~fKXb!iZWBxv@OSyih@U+@-7BqVhjPThdF^uZXriM^+ zv8E__AHhj&bbBikI3GDCh-DS&A@S%EJiI^QT+N2U?+ z-oyH2{@r{exzuy`P#?z*hIy9Eje-a->c{S92j~;dI_k3VU0!&qHgcoO?iAX`K3zJR z)d~khrLFi8MmBSSmTRYLB;Ilf*Je*E6x791zkgNLI)k7{(pwJcADI=)hhq z%5<5y)t2Xj-65~|FkP{l&8B54joOjWJ!Q0ENLP>=VyHAy2rNeu2Q$SX9V@t1ReV+1 zl^6XiYj6#EO^3oE`VmGe0m?E8wZs!0tP?2u(~Yiv%h!ACMH7`%x$p_rPX`TC5v5$P z1NA?$Rhst)C>P#D(`xYKsCwuSp{XE5I9ztoZjCUn#6GQjvog^_T6g_GDN(lmT+?pB z8D(fWG^@z9Ds)-jSwXC|(swvfjKAI&o|AL0YoQ(2T932i>a#=mc8I380l7?qk4xoN zZ0V$$PSU2M3qj{mX>#*2-A%&vRH-B)gME>F{sZ8c{JrwK7dnuXyEheV$b`fW9}a~8ixm>QN( zDEGEzzIrZnDObnN5V5(+6 zQ$p4wan$7ZTH8k!{R3Y4f)5BjyN8)om3Rlw_G!F))+KDQinO3mHOcHl+T=ARxa2 zxC9EBqydm2#Bc9C`_G)$466PX@8?9G^Ya(RvH7kEk$DmaT8fQ!n%YT9;elZ9TZs6@ zV*QD`!GtxJY%!v}#eS3_it=Q>>X@fD*`t{3w-ZQ6;<`#~WZ#k*6Eb8tx}j zrM0$=Vf3g+QK1)!V+5(>92)zCK361FVNd?0Tqo+hK4g)bPp}N%!39^9lKO28hU;MU zC6!!ip|qKk&ohvqh=&wH{DnP(fn?b3eTAa~+ju*lzJeS~VZZ$EC;)({fBtxPLU=ew zu-ZIyJC)Ak{5`E^VEE!w@fb_HFVY}osH1HZ343ITz zF*m3c)*u$`bvhMBm%7hb2>)B!BvH1m9beLK0~f1Xh!`g7j~8hLmBdFPLU<)5I{NUk zqj5b)xg6w*Z)z<{HBTE7HgXCy3Wa8g$1+Znt*~ zgbzJ5{t{@(KhrBoORk7$ET5f4d?q2=poj%3^d^D}Y*405>wd0LYX5*99PnFNTO41g zTVoWf7YJTwRAx=VDl^%*A%&Wh)MDs8UCpJ)^CZi&$nwiAthB|n+@e*qP-QgC$a^-OZnja@E+cU^ZUv-K#SoGNRT>Wm@W%^8)r;N~8}j zy|?~4Jkx7Jx3~2yaHyq2YyYSX;UPqw{{`}T-2;dm-V1w9Xt(JnWqq~liA5Hc1r$sm z79D{NB>?LmAHn|{==G|F=?>`XiUchw`}59|mi2ykEyiTK4Y$Rc?=G3fm0MfBl!le! zBys#)LjWbOR5LR_wphR*ND-bx+G^t(foaRnI-emrrD@Ag9t4xoe0E#Q%q}|>;Spmi zNmLrgFX!Y#m9Gp+c5pz7m33X*o|z5B@qm+SuZDh)X!5McW63c8Iw?l*J)!#* zi{=OlK1;!LNW$Puuy; zmtSiM_2sZ+4iu31PsbTDgzFzY->ZAz5@23^+$9Dh_#A=Jv5Dn1`OT^Rt9n7c6Aw_z z9ESe)sUnU0q15^YyMk0emC`}byr#@*J10_UJj&5~N|j|ITa7uD z`E%!rSwZn=;wq|?(UH1nT)7M1X+>xKzCfvNqwSIjy=YEX9-Nsi1a@PH4w(*4(N}K# za!_LLY5U4g$Ib2=K`Mrd-T@u=()?%Cvjpot2eG_}{Rcnku*XFp=xV0Sr3cE8x5 z;c(ke9WTs3tfFE`MMQEKoqEyjk4v4YH$tP#3EXtHjn`*Ev94Rq7Tqbgp>htzdVhQ0DXzT0j)Y|KBJO2x+(P zfq^8noGJwjrw$bfzCg|;c2$+tjPB3o zHaIDI6zYZLt&PDwi-`_+2B@*u#%9V{LT#>amH^3cZp`*20&(hx^|4e=MzQ?olObU zH4)>#Q~kP9y{=RLVT-Hb!}V0nM~CfMUFVL%hXt$|EcTqR%x(Va@x&Opj-2Llc+~#* zNNn;P1iRVa_Po3g$Tbdromn^9PadevTGyS-08_ep)h7bG_Xetui{qm>_9jDiyqFFV zIB%W5;g3Hlt-l+r|K_|K=sKwqnqiLIPuay|m}kDry}!U;BO-C5RCY%ue=g_eJwJXi;p!A?=)QxP#1DqA z?6=2bfN_wc2MZVH`$0BLxf7&waC`>N4s0J2850ok9`j5vfO!5;_oO;{ICpBp;ZjJO zCL#Y(BzHQDs*Uqzm&3tCqT-#0ZY>7JBWK!v;bWcMuZ9 zDI>BZk*`1K5nxY)t%z)^}XC*K|8uJ9=w!jc|LWc2NBhpz|7SqpUtK`HvJ-@6owVDe72 zW>aSO&sOF6_~jj-n_GjWUaQ{v!)L-NLG=^!)U6Aaq3N3XE-<_DsJtpqVrV9xjdSWX zpEga3;KpauzE2Q#BKuI40*KdU))3ed@lxqVUj{(7JX6NK`B$}WP`5#7Z<;njD{L zgbAp63-H6cywO~ZrZ!obx{E-Z4hijg+1Pg%&lV1kCAX5@Oq6p)@NrL~BA;t)fkWE8 zBGew-_l(K!isJ6G#}zs&$BJl#=VXNNmcZ8-Z8h2bI<>$#8BXld=bv)6SBrKxp zM)VU!*Qez41o-XT_2Q9Q-8p%k%T+mp_>y!a)r-by&j_j>L{{bghMV4O-jS+`SDA5F{jfMDMXe1&p;3J@kgq={3^=mo3z?W{qs^3$P{+z>K zj>rov$|&4@R6YT`bZz-!^xlnYaA5jR%0byWrZr0Q{6l?1gtidTrOvf?hu145`IGyf zN_AN618YqW zK1|MidqkhaJ3R|HzyrCOW$(R4EE>Z@zTKMp5K8sy|6L~DEQiK}7FZD{%f?YR5W%oM zH$wTJWkV3voAyJUh`jB9GV)6yonvnqsjTWm?At?mVDAy|Xo=Rsam?*(%U_XJJ18rb z9AP&)%MP7dHt?MH+y@L1ZG1C|b8eUBf$TDv_LrVPunxNeh=}jNrSn2Od=!t=gGT^ax@|?+?HCint1)@P z`?lTwx5gMpYspBQwE0o2F8PO?!Ti{ zn|Nck8K#PkkG~bfh^mjUd5DVfBLlO&y`y6&tn8yuX=j$=-2Ts#xwNM8Esom|JRC>a zxLExIsvsTw@q8b?-3#MtNWF4w_KCQ?y{Y8RLrL6`oT2=`5JQYrdG7 zpwf7TCNy~Ylp`oY^j6t&Q=S+z4sL+a!<+XXe+;UZ=nHlYoS%QUGpTG(_DtWt3R%9I z=pjIEw8z_~hv`SgkX^XBZUA*6<9wywPvI7r^ixl7{Tv-WD^9qWa}7% z$#u_8sW~bItFBqBw)_EJZ=NJk?WmcywDyY>6y)=g*nmwQY_3QE9Or{r=f5rY*-mR^ z6h1A#7^KF(2-(hr*KRI)vR^xtB`srloVa@6XG+j;{++1KY00NVD=S0W53J0+W&jU8 zxtiNg4w$emGSl-x-$`KXndko#vo|ZbqU^MI zDj?e?w+u-F{H2-*3iw#md-SbAfX$nR0)w42uMJH)x}_K^!$4CyQ5mw;d^_8wxo$l7 zYK|zh!?w_rEwKo+fwvbYjL!Y3xeztJ7BOhU9aFBpWITkZEf%PXlXug{KnZGKRaf-- z22B-?LKmpzN|~xWcj3EY=tWu;^*Hse$~@p-oSE`WM_vz|ui_JSp#3Z@e*|1iDA#F? zrDM@SY@b+bGG(QVh*#_Wp@;s&*xQLx1s>$m{N{mj`xIyuH5{w?z zsIj_%0Nak$WL1(VF{SP7stNhXb#ujf7R?A4Ax2Ee)HZU&{K2jiKf+DPBWmX8MtBtx z#PD(fLr?M*PU#jIQ6AH0d!V(Ahj&nUS21N}Cj^6=KD)Xk<3A+w|Us z_WNLHZ#+U9m$oR85D|JVk)ZJn<(L}a$guw?)?ut#Sjv8?CJF`!guRAi+tbB6W-03- zI8-2o;yfYVW)COvE`Qa64_ykiZ6#7f^otbS19QrXbjX7)hs0ZR@A|A$d#;@4=ST`TZqGdXJQ+43Eo-xEceAw_$)fX?5R))4RhZUq*zszadAz{YfRn!Q z{O7|cCSwMr1SheVhx1=S$8XU_zSRpE&o!qS-5Y!)R)d*`mF}OKUi)w1C{tKki^mg_ zY5H@=5`_c=g$_~0#O<0cD(pPKZ!`M(OddwHu&to_MGpHQtx7uYW=$*BC%44%w_RCO z6aIi2TYPh{@~wBHE}f6Papn?mU^?b;r(2W0y7aZlX&Q;c79zbSm5<5t^o$!QI&BIo z0)4PTW>RZ3uWl#o_3fdlkx;EzX z|9_^Js8iS0AO91J!S+KtIS!w`E^;wDRFl~n>S3yrR0{_ZKB+-cv9;pLikmE< z!nO7%ymvOxbzI3S~IF+NoeN`zB|md z!^@fJSaT*CbzNQbNVuGKjak9v%(oI9RiQBT^$f*`?zUfe1Ws!G|K~|Xtn}NjIW?EW z{z#RoANkJrOtS)#KvF-+a2tP&lg$bcf!_Vm9W8+Z){>Vr0wHE<6$s1)XD|0gTp@|- z3M?7&=RXE70GI!|?m+%r2Fp#*Pln)UclOg1F5^cgn$7ozi^g6vroiG@`!j(9>(gAL z=;mdiNt+#^MO=#$iy_}tcqxOj-k;04^do`pW!DPp{7Ck-9(=vjRoOO{whK2%=F4^0 zZRJ@XDZ7Zd%}M{buT2&~a=b2-P+|9cYpldmBb%XdlU6C0#@=Y^-58>()+r=mg#IrH zqrcN;PTKv^)02&^)q{_#e-onF#Je_(lBseCpr}(2<<8d#Nl~aew5(cev4nN$s>=nI6!I1fI#`}xGqMnM^-k- z`tGGVHQsX|U5IhtFg{5$w>$j_Rp+t5F+p1`fTX|7{^mi# zaa#?TICb-QdNxx-^}ys?JbZ)m#e<@mbKVJiBWirMx=?J#45Z2NJU~<_HoaELS1XT^ z8+^}3!WToMuU=+t?DKMSKKe1=QrVe8niI%wXFUb}lZ`@)t|iYoRf%vi+H@g>xzc&m zWm=C>L81dShEhHScs;(XM=*rdxk?r_t^aW}l9-22$J*P^uLg?5zVf#OU@eqZ2N)cu zej?(QBOLU0KXB)A86cXxMpcSvw| z*Wd(qhkS?d>z=p!duFER{g?Z2IqY4#YSmh+_F>7BP7h|v9TEYAz3i=lTOt~2x35=t z{4U~)68qkYPQ)hIRcQJaWtz?P5n>)Az1_;YT&qKR&O$5hIyX2PLY+PLE=C=ziv=ZyF6TG#wGg6y z720x(LqvEwrwF?4*hCz50;TAAY^)5|D81NDD)iNZhVd$DxzmsPAHFVCD)bU!m|8L2 zffJLA;RBRz$r!;=$897N6Fb=W{(v_{&1d+3-xvQcu4H67JR#+q5h3l1|C#VtcZl`; zQa6*!xLnlhQ4tA;7z77%0R4b$?82?{_KX1*aqe6Ct~Sl{*JT z`sAkFO3xC0NZ|6=35d=*pn)TB-ui?Gd}wtJG;pP3xQweLzqX!q8i1GUy-XK;Amj)6 z`n<6BnP!03tGe`T^Zyk&M?*J0y`Z#cL-<9yXo1F8mR2nu2({OF0)`iVmO|t4 zQ1=UaRQkdm{c9T~!UWc!9on_}@%&J5NfZe*_qUBI%%VX+5PQQWfFl(P0j@>(YwM+r zQu&B^1d8*Fxe+cjAD4pqPaE_vQUF3Z8MD+b0oEZlbS;XP+dg*V#zF!JLS8NF`7fqy zm^rs3U_RF&0QH56_}9ZPPWJ2iC051Ld5v1c!T%VZaQ)J6e7;3jx#DiSSx|x^1fM^M%Hrg_VW0qz3Mmn(b@e4Ca$3t+W&B znu}EjcYc)-QNG>O1=CiVaL)^bU#6ZHm0P6N5Ipa|cz0Pn(p@C!FYM=oJ<~6K&azSD zZTiy4_J790)*MAVC4t}|9Ex!kZl_Hb?@yi(Tf(v&YkOoo_vImAlT`DiR&EWOp&$c( zL)6F-Ld=sm^BtIM|D?D2^5EuyZ|R!*1Ki1KTh0D-wZRb4m*(%vHiqwkFI))RSHW9b zo`%=ZVgi2Yml~+iMK86YA)OV^(R4Y$vt)-T!}j<06tUt5o>#jX`L&#SaI#J!!kUAS;MN;a{R#wy^0_a3_8qz`F`&;tN$TnWY%v zTQI;}#HH1PK7W@gFYHU_0v_di!N{ZN9^vJ>r0=5F$LW zsmDj6{z8A_GWx7JqorNahlhkruAlS~O6n~d^{Qet8~$8m9vpjm2=G@w33wt`qj;VX zR`K!=(tT-0DTpK-QT2mCjh>0@fmWOSveM>3Zkh!zal4ZbZ^zc56z&;8{OdT(x_$Nj z_RUs^PGqEAa)zYd~_%Hhh@e(D_1kO%~?b7|tb_Zz1%4JkbJ|XDzAMeh$ z4K`CLx3H>HgYw%A&PHV1&bISxAvnO(eLP#(d+(eNIQ6_5y4ygFCUobVyQ}Uyb+!<( zFF}gL^b&zu5SOWQk50#Jo1H2e<-4v{CuFJTLzY<3GWO$Bg1Ayd3%(j26d6TD)QcU_dZZyy@z}0i*x;y0cn546e1mUv`%8 zUw8I@x?aa*`fbivl2IsmP_^>%@{Z?Q&>wKQd`na+sFcsWB@CbE+!vPETG$eT4S?6AkUu9PO;N{67uA@yI6;AItz->UMtO6fU={7-*?5{g} zb6m9^QZ{GH#}Q0Uu5T?ApQ9J}_t0>F`@SL9H^zpd0dBn2(kRQ&FP;NIZftZp)Xblrt~{t(F>@XZ-6_ z8u+R=Vn4uvt*w63-AfQ+)f3!Mh1H`Ed<=q%c-i*amxojT6r$k-YUFAc81Op$ndQxS4ytLn=aZ6a9R^m1rh@>+ zFEkP?$duarGaSTRZ14^eDyy?#TWQKsh@{LKeN3yn1HJs$T@89*sAEt<=UU2~B|<6# zC@6+RFj`+Cib#p#9Bnkf_1y&_hkRwZ47>+I_g(BUf{Ep!`aqERa;otD zjZ^g#$T_(4*`n`Z@p$IBgrK0w^?$!9mK8l8kxBF1?y&)mAYQ=BDQ`1x%j+6nnzL*v z#iv%JrAOfN4!z=aIfupeSO^5}6WvS_$P|y~NfLiuqmHw%`pi5E?J zD&?t9IDmkJ<-L_P0B6@LhoDW_MAj5v-xGol59Z}19>c~unJcGfl#@TmOA=GElie`F zsnJ|udO_Fe>X@H+nR2|A{kcvu_$(e#jRp7Xf7}J25Fqz6sEeGYCz@fT1##-*kFhm5 zq8r~IO;fw4b0$r<`Kj{FC$dP3XUg(1#)xsm zexT33nbjmkc?Raub=uZvCRo!i%?ObiBOp1$397I)HepABhJ@<#kRLx z2`h-mn4kIJ{Bfs0?d=SSq5c>smZlUH-)DW-6yT@tpw2>h2c>c+t-K zo=gUV98X%?Fk8MfoQgaOJ7p;;^*BZs6{Ebi!*@a@W|$c^99B$$I8>uCO)h%NOS4OY zUm!PnXRt>P-h7mpN*G~4Dii1K{bk{EN_8HmR8An+)ff1iIFc@>8jS$hTcs=*aIP(x zWE#wK^nn1IPA?Nd!59f|2xyegOJw#Rm&6t1_rDv%9^JKG?}{gp<(bF1h*JSr#xljW z{yV(s zV@NcgM-?lS@cgAGGyfp!NsQ6<^7}U}IZ}P|CpYTZ$I6ccO6^})-%^7iil?|uq4))( zej`;1CEu9AH+=>M^ED98HvDwCG>8L-)tiNk5f+%`qebe!G3V-q5xdN~wouJRs*X9~ zqG#-45$ve7f&hT!lfz&o1Uv*Mm%pR?OI;uq09gse*IWM}(yZF3n=u&#wu+3VF#^~r zD{x?=wjmJOT0ye?(e^rFSgZ_o`-(-M`VfGl1uBmTxt<-7ncZRe+1Y@ zyEzr#$${C87HcNA2^4!v4a*gY{$%E_Y#)w4v*jpfOGAIhUly*qNMnVfr52B7ftFOJ zpfn;V4lU)ZQUADvVsF1>lt!Ud%uzh|r?U{OKi|KRG3q}~kuR&e882{6=SBy{f+5Lj zn@fQzZJXQ`9-coDX)r~;wMzG7QpnlOP8yra7_=Ij@OHFsdnIJ$PBG{B+g>;kzMq%r zXy~}OVeUr{gpcdQ-|Xdx^`jt*-zn^r@SP;Flg4zm7lAe>cRnx$$jQS5?R__ zGnySaVaZs-*G7+<_S&~2LX;2}vQzGgvyj_n)7me+^aH)gVSIWYWsGjIMqmeK3c|&Fab?2n=sTh^5HUy?;pB$I#H0K@7Ax|mU0yDKEqJv>dtah8;m5x z7ix>YF6#Xi1pNZ?OF`yv_V{zu;HTW9S4)}N3B27`+1M%vDG;}pF#w7PK5`+Uvbz?(W!!e2e)xkKrgXN0S z;T{U*JE2*o8^r2vf8cG!@6#T>qYXFZA&Cx`tRDCjL>mozSTz4-pj>P zbGN0}hVi;Hr!lAGdsF2RBl*ufL6LTUM%^xt#-UF1u9oPt)`s+U!-f5&F%_+I#qUQL zYx@vd!<|Q@4?-c1oa@ATicyn9t9lVlM+}vJbwfCh9yH$aI1pX7QosYvz}>ICBb$FC z8@POc2k?IG&4aCjy%2b|NlaPHitLA<`H}EA1*(K2k8rq|G1piZM=#ImO{XvKXOS z!Y;&?S*^iGj1l84=FmtaKY+X)#p&|x!i0D5q_}C%+;%ixRyPcPcKtEKlwEfwTB%qm zPbOoQ!QUIXNw%fj#D|7R4deFV0b4pGAMu!&f&=sn21cQsm-g;Rmww4@3f>osnAN&_OI$ zJzF$k1-ui3r1BU{$Q^*Ou$nU;yl@O@f7bcXEGmvV8)Z-`)U>g8s}QPnA>Qdn){ zRVM?r4NUL zGt@ce>`Sv2$z#Nw{zIra6c;E$Xo#7Mv!-v8$4ux9S5pgDa_NF&DPW1PL+JLEQ0y(2 z!bT3*yxi$4mwn$MN73nV_2^$Gx`pEN^9?Fe`QQjGeSY5|EF=_7%yf>5x9SlHj5Fy( zjFwJ5;6D;1wiG+ zr;Q|&e^F`N+vIfH+;s-i(NddMJJt z@#}$q+5GJgzT?GxST3#&*sPdvsdaAc&d^L=gY;`jalV0~Uuy&HapL2&G5>pQ#_^E= zaPa7#ucwb+lN%?j>voB4lS_mY6Bh?(?Jhz%K1Xm1xS-(ow0rxH3z7ysnHm+PX9EKS z$G`C$EJIxmoQL*lCvDEy?#pzmEqt0cH>>ffdk+wQCi|4W9Zqauzic16kx-&}PTq^CF7)(xupKAr% zcPn^RIqbYlu$si>2=&vr-9BvIN5q+C%g^eG%c0PJS;kO4*W__Q%y|g^9L&~_?eRpq z&r(F3^6kFxHwRg(LZ#+Zy^MT~JgqX5-lLxfoz6xi^za%y&^z{)#?_Ew{Rs5~;mvaB zKBY6#OIdcLUvSa!ja?nXYqh%|{;le7rUl%tWXd3v4XIHQ{SDRO>or&p?s)tYRp3@!@ zdEL><|G2DZ(j1V}x&d92BI}oaXZNaEsA5N>2*O5ZwJ*$rw|B^qdxuFqv&?EXTVXa-tgs~>1p`gBiZ>^{hldBr zdl|Cfuq|=CWSlHYVL`k5yN4n)iOuEZ@0255+6fg(!@|jaSoLNkK?Pjr=!2H6?1qTd zqiKBfk+j*Qc1ch+Tcbaxcf@|Dg(W#*^UR$51rWtoGJTb@6e@YCW=KqPbGu6nJRaAY zY+5o#68+AEvDR0EtUWZ(V9ykvay@lh4Mfb$XaN2pB5xi;Hi3{9=wCzHxUawd+;aNZ zlv*I=A*iAKuD+h{e&o@-%Tke5(@lL-q4s#Pw>WuDzG9+DjhxK5*ka1rXj-zlTosj! zm;mW;>uZc+UEt^X_MH2yPgO!pq(GYik_3uf$W3pBH8PkA?`~~o-(QNwVEL5im1049 zp;$z^>lyx1EM;ihia@Iv3Ky{sG>0rN6bqI>h@M(2kh9GfSyH(jMvI{!;qgL;lbAII z%n@nSYe50<`1O*Db*3sPbei*$$7WzauY)cBGXj!4lBNV)BQVPr$t#6?qoZe@rj zlQThe6BzgAbm+vPZfYVZ6Cfb=u2HVuclC0tmm|n5;$-rhFE?~qAbDYVPB0fFQ%R;@ zt80wTZjU3y-#*DQ3AuXOqk}V;{vYzx^)F#RJN18L|4zw-l%F5(1e3LCR_?D31g+II zn{2ib@bE0+>{eTy3N@Rm{HLqrx)Ujzc%b~Rk{AUa&y$avx_9HS2jheb#!6sf_m zVso&AD~-V+TSw&rQghxM!OFPCd&xT{b4xhCU(nbWOSLx#g8eqJ4C{xS;utT)9fHQ* z548~kqz|cmAgEEvQB3otj*B-tGK%~nTD2BcRK_wzZsn0Jf78C<*lmy_o|o4bNUZdA z$tTIh97s7F@581VYEt>QwPyPv0p;8^*zm@&14ZGXaN4rqQxTzfT6N1{==|B{_dDv_ z?&QD2l28NjC|+Y{!1W4u{iZCM_`*@2Y;2jg(FOmB>snzNGLBK|<8nKp+_!ui8fZ0= zgSjmFnX9UPUAyFI(aj2M>{WV5drSh2jHb%Rg6)acFYifYljk)L zS^g+`q1X!%1Qc=B-F&Qb)m_ecfS)M%g1_*D$0(EC9R&OB{nKM9DtpZ$@v($9|WCk%5fz=N~jUc^8q~;z;z&nPtWo$}o4%gY*#0!ZU&z=yjsEFlNLUu~;V>5MQX=kPOrt#A*xe4!`v_ zv0rwIvAJnGUS^y%Uw`=uR{M&p=zLp6le>EO%VRzD!To4_)$I!Sj`wRso?lD^{>|)+jZLEk#)u^WRI;zdAT$f0`WL%jFRJ-h9L)Y?eNE| z?FEV7JH4ShU4)&-p)ilI8x*7s@eomPLaA7bf5nXPTNh)lvRABSD#V04K3tbM*&*=* zC{CMCI9`=fq8~Jj#f=BX>^J%&HDSH!75kaW_6KYD zmKq|;S#3S?X*Rk;&RPBmaj2Tbcb!Tch~z>(H=y>-c9JAkNG=WkK>SZE0TNCv9Qdpn z>Hl7xI5iMbmX;3vNt2a2OIuDj$F^%cWF0!VE-j@r-XV5AeQKDlJ3kuAB_m?))t z;>v5*i*N;!cth0J?2d$F(&9l27iDt=myjJjZgHz$Gc?3@boQ*~>lNhV`6d>rDx3`y z>U{kYd$DmWk@4fzZ8KE%TC2RIZUc|jqvIZHEj!=+EwP@K9FcyT<28Lj+e4ki~2gcw8%3}u3gVm#!y`}bwDqfp(rntdO+qILe?8E4&rcF|2w2~d; znLATgJlt8TTkjd=|wjcYvPO!X=q zPW#8EE+$EZ#tlQ%GrQec0^QhTJ;drjj?;(YwRd6g6*Ix|q1bE&98;+qM+Xgz)nlL$ z5|)d>0hIBE&Yuc^vNPPH?C!7*7%XyS=jiAnij6gd>63U2nTH17(WEat(auK#9FqL4zC? z%My0!Kq7^YcF!&Ik~}YpR)x)_#wWc%uLD0PIRaE7qG>nH+xQ?rP;#YA`;leWC@B!g zN=^lpr9%^yuZ<6GPs;9%*(=|`Nv`k2Pk3N1%@Y5}h`jsETjs<80Y%pBAahS-n|zo& z^b3;{&_dTfz9gb6{uh`$r(8mb4v6%XzxZCnMd%kkA6ek?oX9mQQZ%_L9Ea08idbx8 zf2ITwo42>lF{rx;{c3o=ODs8U{JjEbbq1ea93Kfat@qsb}Ya5+;^m4=~nZ3#HUi zU;Jfo>E7d-y7ecqlf$6V>JMXMV~3nVMZl7jVyjqUY)htN!I{7$zfYkQb@5$xrK`s3 zhF1IRtGc;6G%IsD=Wp(Kzg5JNqZTNg5{4oj>6Z`78nd`ds-R6ztVQ*0FpZzRkaj1T zTB2oXpR~lvXpFGO73HhNx$V!`I}VrgR7ST@9W7}YZX8k)T7jKWZGyBboXIKiSluVG zYL+zd6RMiN5j5#=$_8%t$6F9KqQ6iNOo|Ha84zX?d3ac_xas#7;GJ0vs+){<%`AoI zq6Vc%JK@pT9PS9`XoDeBeX^!R`cJlx_|iC~rEVcC@DV8mvgueW4k&c#adVEMBI`** zhJ*muHb7gLq!UY`4>1YChxfF^D4i! zD{3+SCQh;EUDAl_#u5XBgm;)c#$%n$cA6X1pvHa9f_HMV{papNv(0JLl?REB?YdT; znWB77k|Yv|C;+q2xoNoWh)G(Z#@Wvg;??*d+Khr(3riX?So5^^QKTb`tu|=XR}!tz z6GcM7SrOYwj|1Eq?#1UYn^RgN`G4>^82gsG+#F|pX|bp ziYV#)9v{$}i~#~!JcX@H$^AdwIOz@U=7Wo=*SQs=J*(V*+Ytt1H-zoc#%NL)R@TlGXH(M#%;;i8iI)teM8O^h%H| zCQL|7dVE3EPa!&KOjH>l7f40>C(eXV!~r#lC6YatbO|pwG*G0EY>^HoE&MDxfHu|FxroDcugnD*N4$-w{|;d+bKV;$cpr|=ai1TZ4Yw~8R#NgU#3L< z{ka|-+fO9)v2nStaG?-Q!E~1F=jG{`H?%G*yKN7R+C`3uQ|!il-HA_2!}g6EMMRU6 zU_{s zgT6;?)?JcBlJ#AmYcD4vgvhRWI@9iOqAdU*<505K#Mw0{%9`GTV7nUu9-qqG#I|Z; ztn+8$_%JeH2FUak1hhB4scr;=oQj*J?@zs@zVNFB-u9*GWiG#fNl9gE5KU|vY0`x6!txvaPA-376E^QEkde0>%)Pf!3cz)j(;3ep*+^9IPb3-wbeGO(g=QOCj` zxi|<6PJgaj)IYZTU7E*Xee3IFL+x=ye*oI?YOjrs`JpU?mV{EIe!=W2-_abkEJ{)h zP)O^23sMF{XnyZP&_p^W)ZLn%9;lRmU1&9E1TN5OnkV#Fgf(U~>eQiD2vXde zRFgWr$?rfNDsad)z>ZtfMq#EB3B_hmRzg-?yfkLCo**e%F16m;93=SeSXxivSK&(2z7L!ED$U=2kMow&W3ik26JXZe zh7vh_#}NTA;D%1YqLWmqjP=Eat6wB48rnC86eIT&=es6l1X?5YQFjp=VnU?el4D0n17PR8o~HY3!ZP`OTf za3jZ|i?1BE09CxQh!ogjozvd-RU?wJ-o8Odx~s`|y=&iXg1A^L z;F~=-RsNJf8jWXrDaHOvUMn6lM;hiRhno%9C8(k z7b6T=+{-Xeo2e?mF4~cXC0J^8$OGg#{mg1lC%$ivVG3UGokWccpz`W(ei7}b0@zLe zYYcC|2vvPV@b!9mfzj%bKopK=2XiGT$%&h%=aoxO?kM@x!2|8#&3AMUQn!xVjd%J( z3NK+na5~QIy9b1yJf{AK*qy{^Y)&_l>YYKdAhJ%~g8Z~oE-FbP^tZXR5w$xD`{mHx zrAOF*koX`uK}^Pbh0McK(49PW<4EoH3+j*U`|L9t=+Dido!ervZz3Ol^Fhp&Q>2)T z#L=VsOXHa+!IrUCj=z4(1L>DGw4zK|fljuc z>FZB7D$sJ9Efj6LS)dd;y(Alkam`p}vCAOKTS}>8g3o!Xc&Jc7VJ_vEFS1{aUL^E1 zkZ%|0C#v4BH(!b*lffEVnj;;CR%`xIES2?-6wR|`wW|WWu6<4D&*+oVLj^!Gc^ikB z6dQ*Fd>C+C3-f=o@3^2m7G^48p%GG3aA36gUkkN&4r?SONL8-r@b?$+#ruc z>Li0t^HmVb!~Up?`CXjJbrHl!;exwtj?h0HkxG1_nM*pf>;%4AdiHy79h%%H@uuG5craE3~i!u}ysG9FVI4A;(u39CK@QggdxFPJd{9%74h@r^0Y9 zLKgcsn3-m!e+(e)C7%M!RNKHh9vge}+oPtC4k!o@PFK4US?%+6pI9M~44J)#jgLoD zIeegfdQ(D3p81F$vaQZKk&d%@m4&BD2-6NT>9>4pSenUc4mT)nSKo^VfeQVL5Eex)@obG&!QnQ z2s2qsD<`}&=}(Bm6hEoGZ8ru6aSHg1uO<%m)(TbKM6)~^_m zIuuXM{ZU$lTpw=lkr#NmD{TTUDt}3!J#QWf<7`_qxFwowI9~dm1f&xdz51+gh8YwE zh&K*4x@>*dAI-u20mJrD0poEy=kw6&J9agS_aUX@=+J6X-)MSAX}Jx~2nfr3YVig+ zXYU1^kZwsU2k+^2(&D6L5D+jtgWi*4){+d?;$YVh>l(sO*L3)uvJ|3!HfR z=p}tz#7Tr2mdv`i-&7VaMi-ub2KBrOKPi4MEgDmxsl}ND)NmVfri3WS$}9zcBH^9T z9o?44^6ues|0as67fVo+$z8e7c+AJb6K=PNfE&ERYltUzGNH`v^ne^+no0YaI#Kku zL7&UPC_m@@sKqak$_|_s*&^dikY-ro*1MA~iD2EDEFK8_A(7l7i^mnNASS!sbAfI$ zP__X8%TP?}Z=L)<)-W6ne=thpvyNcIU%+Cpf4b5`ZQ`+E>a26iv3Go!{r<@;sobQj z__o>BREjaC`{@6MOHAwkc)F42kvZ_wYhc$6vf6()!Uz=3LBENKRiLqys4ZU+B~o}# zv&!(cfF75bOadbjA*Un7{1(I4m!2p?Z)FtqcK6)68`O;r+_?Dz9v_`1WIyi!T7;I8 z(fH~*Z{FkcottL)GORIEjy5$V<_OP9IkLv-&H9Mr^?6a#{2)I3*{7;*!@@7mQe;KF zpL5jmkTP-vx6wH05Z2fZ)rlk9@igM-VZ0xG##_H}qYN0__wVng4H4ADIp8g$#HfAQ ze7{wijEa?QGN7*9fm(*NL|~hzDS;c0Kbl`!F=a^C-K8Q|#+y%ep~aj%9JE+7 zEiSubFx7(AW*Zc~vr(zl)tz}*=WhFxrTKKXFvo%1IpDKxm^$mnm3+Ih=*qucqcSQN z!9P<6Z$Dj>YI(j=_`AL|CuI7FMfxk@anWB9v_C1zS4c7|-tO6w1mbHW8CrVQ8a_D; zUCKj6Wq)*Ie=kz1+cY;JkHK)j4Lwn;IWY)6ksn5Ax^*Bvj4;@7Bd+$oOcOU3ld^Qi ziUdZ|EYzNi@F4AUNp}Wl2wcBqRJm&KP`*3TE;F%XF&h3XX(_=&rI4E~m@?y~&=Om% zI*_7h8fN2H^6N)Z+O5G7Q)@`wkua_8VHr%F{I6j?=88$Agy?reSw^d$Ra><_^>z~M z2W{(*MB?IN`AYQqVHmp4)OZx(-J3Vx{o*p?t^C=lp7Q8ti!PS3_ysq;flVo2m55soqQtL2YIx9JyLEtnvE(Rk5IT=wC0d z7)6&(ifCK9p|by+MrJp0Yr53_?ZM?jmL{!9wW<{0S94AZ+m+8oiS)zpIX)Qr(ND*v z;t(F3jCi~^*UF9+9p5AMcq528Tc#RtMLkEkQ^%v3<(uxwZtBYA?3m4tNy4B9YR8&{ zT4^^6N|#Z`I(2};^c7dseG2yZ;SY)m6_xlS^}HPE1NfQWjP{ZMLPoZs#O}8=h#J!h zR75HY1h#uh1h&*VzGXBPsPH>?79uSQ4_-;Q)Jh$qNdxJEd1VKY#($2*RS@rHADTj+ zqDqOZ_GZmuMv;;!YJ+1h(cxnCkmNC}7gmr@?Upoxdk6(5DJ}EPZ-Dz7o}#*Yr9)%D{inE6Xjx^Y}54a^1#1r882gFC>h=ACvzf3}6PSIasXr-r&YT2V~3@ z`N!k(KeDynt--+_a(yaKf?Rkp#{w!ukE0lII!L5aNlIn?7hRDPX_Jj9Yk3%%c``=M z-);!oj>)85$ca(skw6cxEZvSD`h>YNBU!IikjmhhC8u)2-ksuASRPWt^=K$!zI>zgjIBLoKEC z-C};M(Ae~7ZK!g$vDII;WcUv{g>nDA@sN;Ls6m?{_Am567Ro6iRe~H|=krg>DX2+V z*pn9E-r!r%rt#gxdG$pDF(5mX!7nIKvPa5N-%!!{wZLJG`Q>Yz_^HN|+`g>}NNE<9 zfme(Nn|;*tNkx(f0m-b*9$9GB#%Yf|=MC25Xhz69%xcU?flub6X%k4?lb-gaG@L(( z8SoeT=4aH!jhy@zdSStZ|C?V$5dmYmgFc@Ai|(gQ^Q2Jm_oBgQJ>GfzM0)wd@e@3U zdM_MPgk9tC(W0=vjh{bF5*e20-Ni2ZyaYSU^NCuAJ1@m3HZC9q$yGQV?2rLW(eKDE zT84shM7>G>q)@*z>K)9AhMfWleWWoR z(}aAHIw>)9Tsyy|^lwbnLheBq;;MC-5@zUoUo)%ciXzU(b?$f*lev zTiy71ytV-X-6v`>X^|vw(QXsgFC#Lj(e^YpKw*QHT#yFN3YXm_N+3$n&Y*`n94W~ABZ6mcl^fhaShI1v7$J=p|Al`040RZmwu~6hdKFPeb|O3m~MnPLnaW*U+&Ln1DzXE zd)HEyX3$HrpRti)`Rv;FQ46NT?N#Rv`LELYhY0qRkUEje41UL$-|?9>noPfX3Y86X z!#5qyAQRBp9aUx`pG(lq7spTR&#hTezFgrB? z{H4yj2@93WKk$vJ{M3n#ybd_wZStY*Y-DE-pp2i}9%}DT%gY3eC0EC58t~*-5+M4ta~Lwr#hdbAQ&naTAf&aCVL z|4UUXv+rNuuNV!u!U_IE_f#-1u>y4?62#Plx;a9us-0aRo&-J>FT*~jd)g8cDGH~P z#Gl0?LN=7dp5Wyv8Xy3SmXKCumjIefa=p0;>It-CO>Y5l0C_oyPhc(uc$5CmPKvaS zKoPyTbbW*>i(-c>E4%ZM83G36!0o_%7|Q)0Lk&ilF-SprPiGS;2sP7WCvqwt4DV96 zE&x-b-2-rDli7{wuK%=YqoUPu9uHr6eQ@Sxv4|*Ckq0gtF`M|rxQdHd&XpSs zI6GF)ke|q~0GCE@T8e{W3ztkY3kIhxiZ`w`WDwILhKlVwJ}OTezzJpX zvsP{Ydl6{>=3KcZbFBnr2hdH4d>%_8C;xbj`&BfM(b9Imwun);SDZvLwO`Y0+kyR? zGTiwaufL5|@pZY-m+u$*GOfT6@}x$PUsXJ#DOu1k&o3IG+48cMO2>Pmi)_+ouuVuJ z?R8g-NpptsCl?tca>%XWV1?zdKNdQR{l>aX3#LmJ<^t*-=61l4tiXIjn^MeF^5s+F zb2N*|x0xQ_9ju=sNxXA_!mqnybl|{z!Nl}!Ah61|jK47J#o}@8($)Gw<@VKec6`Ve z6cmJ*uEuzJ126ChamI>zVuTsKGdFDnKa%@0un`Y1zYl;w2YJZ2Af}%IyeB40HT`pc zJYU0>L({UOdV}pdd)y_mAgpowQmU$hyv!!k@k0wFUCA^_?d<)&I@s;hj%WCtC!&=( z%*!JPQKUfyFIATHKEgoPr^)$n;AXlU=cN1I%BJ0iq%;ze>W@kqSRkbx@kSsqC8T$b@@~dhx!n<1{EnE=i4Avn_Ga}nfmCpLy!N|9Y_lUJp?e>8F8fY0 zTlDJs^yYnJf6tHmFZ^G3C(3lkt)6nbca2n={6uzMHDsy(Ry{F5I9!$ZyzK|R8tj(d zc%EC{K0XuW3WnbgP$8K#`f*>}F}D~mLl)t{ar{>sex2|K6%RW;<3Jtzt$E`fi@W<4 zwnFuovgZ^S9o$;|f%JDd-y}MYvt3^X-JPBwU+n8VwOMS7n|pjva#*1PKNU;zE+Afn z;;1fUl^DTfSh%jJnm-s{+2-Q&$T3BxO+-;s=8Vk(;qYGIqo{)47SJ*X{d5`yEL7-Y znk<{~)m4X*YMT zuQY&RhQQK%>x*VHOlp_?&6rZ|1-XYk-2YG;qkQ!F17^Ey6d))`{sgBYllX4(B#ORT3G&kxP16Bs&tpl zwC^_aW<}jTE`+T=pC&0-(O5yPXGxc;@;Cd)r<$)aC9NjjfMQAT__a|tn&;iQ&Up9F z=v(UD3LTNe=3aO}y_W{uRr1tHxdI^qFEaOSIe!d7iHKIt2A~l~IlzSMaN+!8_3iTf z)A8nB{8t1kw$;fsZ{&{geONN5#y8i6d`cLUDw)mp90QKzH8oYo&zvuFlP57Re&8j} z;0o%CjAmC9(+2vkkp5jZ6{)S#^Pb{tvvo;vlvq`^H6L<^uSM3y!I|%h)4kCssPxB* zj#M8KpqFmXHuOXajJ+{C;nm4*(pJ4#m)v8g*|RqlN#uwsu40hr9&G8t)A)^s6&QBK zjOZg1w^GrBX7vQSW>4RJ7IHWW4m(pTTC`EZFc{P3>*@D%BULXgjxYA&LM`LRh$F@W zu3v&MH+4`VIYc-!8jf~%yuI|Ql0Da#O+ZbKE{ zx3*j9PH9298)@l~mX1YtcL+$AbeFVJ(y;*Pu0=OUcL>tm=VkkS|2g}=I(J-fv*tUW z8qXMG8e)ytjBPo}r0_O=3%C2qMkB_oZH(OJb&W2*dmR_$$>Gjhv1rR!6aV>|H9Cmg z#Y-B|@jJA#y>zn-yaKpTL(az)BMYdZNNo?#re=+6zHoUZ6q443%f;Bh>+Ed!q;^}=r$U#04N8~nhIIFM&igje> z;`11Kr}rwJiCW1x=iWHkFah_qCAsOLP&Hcnk(LqD7kQ|dV{T1jp$P)3bMEcm+EI+kQs|k>v>71l?P=h5DK}1DDkjh3?NsLe=U7 z!oo89wkN+ei`2@RBxvxfh=s|a*_|3hlrFddsB7VK)c=Z)XhcpMC48?>$71`_R%}KK zuHG7lC~sWOOxs>u2X4h!OIP!Fu)u(PmD1~7j?WdLn3$O9rhpy<36#AeUU>C)5?e?_ z=lc)hkS0=pk_?bYn=rt>PW95}h+ugewF-SgyJk@0v|vCSE+JO9a0r^oU7%(ub(qz=k(OsLIV@rB!-ynyJ03?^(R@Uh+SbmEAK9wj zy?P0^fl9oj2veTr1+eL57QD!KpzBjI|56_w2M{tPM=?b0!Af{|pzo}hU2lY=! zAXs_4VYkaS#AD90ZHhOdbi92qo9D%T5ep*wswuH&Q)soz2y~i$FItbxZZ$oN=Zr1l zn@o_*_u$*LAKqHnvQPV`ncJ5+W^mqIUq#VmYq@r8qzj6wUB~(@JqnPV;d5q{W}gGT^-o*N^4Y^brT=AA=F}Qo9hX%oP&#e&d~Gv^hlbu7 zq2bg1l6One+mU#F@S^^6_=f?`bbflWHXxYU=8R)eL5x3qqK7|>kS;%$_T$}kDx)@% zLdj1qe8LAQU_%uVeNKILh31uOc$JGy2nT6d8^3L(I^%Y06Ibq($lc496zHqEZ4Kd; z@q15&{4t7wx?SY7#Y|)&q?VsKxQyCwxTiQ$Dq4L9l7zCHAzNofM9D0W(nCu<<)6}% zN8uT?JZx9?+SFj3vX!q-IF^Z&a*Y7LJo z@TFZL1pIMUrX2q6cdzorWAt>~jZQBlBZ4SE($1YYeG;&=NjwhJ=J~7x0PI$d^P z3+~_~QTpS?dY)F!lG{Uv6F|Kz(@hfPb^v^y!P9_62D{fEd5M-VrQuSXcq%IuZ`}&s zLG@z)B{*kB>UV?xpxw3CCq1trWE7p@{xk_w1FsTOUU3;G0%4<-S~OoW(GC0MP8N?T zYNnf(@Lqes{q*>Km&0OmWt&Vt$T=k696S}Mo?gi6q__*3y7OOBjNDqT<{>P!|O{fqi4q{4G{f85u_s~ygoR%#C$>m;d^wc3ULzBG7;4yw(Uh!fO0wK(c zjDs~zp0cJjJOT20h5m7RUDa{p1Zk*D z!)VfaMw{*MGs!qtlE3*XH#g3ggj~y29RPn4jbQ_O5+ia#dV#T;ok4T*M@-jRRH^eJ zZiX1d7e6#HfS-GZ36@r!33%tXxFnc1Wh!-r`sa~31Ep4)fDktTWo~TkVP=ZgYnw{H z-N!Dz3#ZIrn^BF0WoG~UAMC@m8*o#NLP~~6EXLaRe9x`;~J>89M$m4(Ea6~jdh(#Peo>3uyqJ$GEfXt$By zJ;wJR5G=p;VGfs)B_Kk%Yq!`vW!8GWmd*R6AvO!_ciAp~#P@k`O~^uWfk`w?L$^rb zLR6%Z-i~^!4R|bbZK$N2eqj$7cG7J6)&`>^`|c+5r6U9EXbHO8)&RRFyVl|VMOXX) zyFXJB@OXbk=2kNOmCcOEdbYyE4 zB=0HcAlh7q0IT9;Ggg?wX0{BHK#d}s%n(8$xinM)bWoS4zh$cbCj`;?Z~mORxyOM} zsiiXR%lYeG)VX$iJ;Z`8oE*ClWA&K#NTApw;jV8LFsBTYwejL;`Qp)^NA{7lc{vF6 zi*kg0V!nO_1a4zJGT4+>XZeUoXrhI-X(9&ei~9mEo2J@ivUMv{%y>F#>n!YW1>;o+ zR*h||W;JS4L?QeOoKA+PfT|+GnQR%RbW=Gn&SsQ%Od1xp!zk} zAO@jO;H^cUFu2Kc%oK1H<7E>nmhEDbq7@BwFB@s*>~qJBw zQ6O9pOilqC>xqd8Ep`XKPxkZl5jd6E0N(#)Avb_l^(WBQm=Qoj!%$uC*bh@hpT_-T z{?PbR!(_XS&+kUG8kMXgBB$x>TG zYJiCZ#Q0)jSp2&JpnvJ82Wg_kMi#!dXPO>j*mW(%7a!mW_(fD%8O$@?CC(ZjMc>O-h6(lTD@5Dq(hh z0E=R~ggb8$lq4;%`#n-2h4+@QKjSN90Pj@A7y~3<&B6!6taG3{jMkn;z{mk3&CA-= zCdEG4(kG+G!b<^S$d~x)dGm(z2cy1(?=M!Wc_~lf z4~q+Wy_VhT$4?}!wK=9j+oK5>dTnZRy7X3t#DDpjrvHZSXhS=i>%)BTgRZ;e>;=e$ zvU5lmIBg=jLot9_gp5ieh-UyO9GoGyWl_Kw^z$H^XY}s7y*!;q*u|F2QTy{#_rvY^ zX+No-^zf^=GL-^Z4FwWTOHAL}ZT#>SkJAmyxe5{n%?hUYW(I(+S0T1=`uRWF8>>)a zrV*7|^9geOPo1_QnUt!07nfIy^$y#)XyD;4(Y3vK6Vpw=0_o0kV(eG#kz(nmF1<+n8r4-6-^74EM_A)0MnK51ot_BR zi1`}E0k;+2Phk~x_I3A1)n3n4dcA7Y>q>x_PSM%+nUXxxNAv2(&3`n><{to(1l-N- zz04rF*zx;-D(lK2s;^mZ-dSX}&bulemxwbsJVxiuLY19TUVB$FIV>v%(wnWvZUbTu ziCczk>H%MUCV?rq^KNSvgYVuqE%scnDB5<$!K=)vV|sm~BYulz)KjmKK(6g_7R;Ti z8Pi;(N#$yIfayU>$L7Bx$qMKO#VI#DVVi#bF`hgBao}%H2WrdLb;6$CYs(XXRn~0S zzibmZzZy|5)oyj=;(j^{wI;IoU_O>bB2J5;I#VQb2@rlM?Ox0l$6fqZ6b`u8cO_{K z>1COTIE|`uX*i{Z968%nGByL3)W`UkwE**^W5A%t>Fnb;Q|;yW@h`AnD;Qid zcjXOXKJSkDp&IT_eiGntmTpU>_N~6@-kNCk3-_0ike^6~7{HfX@7MG*&#qf(zATd{ z{{@G2?Rp=Ztgh>s!WvWLkkf3Wt@#DGR~k@UT{8B3Xz#oyxOvFoJ!I7PXf# zS4qjC;$he)=U+?sjzTiMHVl82^b{HziCkmQ{h?R;wxQ5sHCcY2(hJ&bQBk#pzD$tw7K8;!8?=CpN09z<1q6bMcX=#H9;X~ywWQZeLqs*vQJtNHz3B*oeFr&Y+^RKUSWT@Oiu4)`@lc2 zSsYweKjFa6VGH{LMPGi@`D7b1cHnI<4_yB!v{SO!zrzr4{bjp@3f4yiNYn5#FQ;s8 z5Tq4w(tcn)rj8l({dzKio!kjsjm=$}*PMjB_dLZ3+R>{67n4j$DRdX-{g_VRqhG|!02D>RIFVecGm0-g&PtH6z)$Z!IFtx zr?7AOd{1HD33a8mAHykVRYS`#a$mIu_%KStmTTqXM{bm_dnzh_!6BXOeJ!nd`Wn&i z^bxiqMWiDFpOjq;bPTuH>kBP9B+QBSplT`-=O~gSLT2NH&+|fZZJ4jTT1w`@25VI4 zhOc)g^QhHgf)Q%~j|r2Fo^b5IP(dwg8DJLDR&r5%`#+I^K(c{$$WBmSkaE@_W1<_-H(3$IebySWrqt9=YKn zaDQ^ISD~w+KdAVT>J#oir2c|8=Azb8xR*Q#1R|;s6Y%XqkOYB z)gf_`o9hF4{RCu|D^cEr#ry)O<~N1N<~Oi`wR>N#iN5%;J~6^)`Ch642pnCS?3*RK zI0k#pUHGUsVZcQ9rrL?tyzhfYT>n4R{>xf-+=6wdogPAY0#1E;ZHniubHx+8*iYDL z+H1q%er#?%YU9VB1c~F7n=p<7D5$sUO0;KXNPEk5s?pmYZOi$3ILc=o0ip1Cv4O?Q zhN+MO2$VuG9BiN%K(pynWdpha^h3Vio^Tamo(+MQH0`n?lTTY{-A9f2-{>|!#Yq=xPo4040-(g`YJD@!SS#bSSM91 zKjBv&o0Nl#(>$kh$!)sMIvaeRzf@-j@(GLG-~IG4H5=|Zv7~tgYPT761XkScMGg&{f)E9rXYL7PB{H%6lvv5UzIVrPF7} zI0u=*T%8HoVQem4`m{G{9C~yK)kXFNEjUsiqvIq^Yb=mfC`{)6V4ux|W==jkWG+(S zvWWqH3LN%B7F?!qu<*H(p?F2QtsE#6GA)d=2->7x7#QNET?*1Ma0dMWK`7vLo5xA`5S(P{f)7@H*nN@XXdpP~ zu4fP#dBVcB`D`XCz7HeBAzR-`(IHFWX|}WtpuZaB&l}AVOI6DIW#J z4r69)vOKom(S_$Qv7lb^7TtiM37Ofx+MZC+d+^S%wF$Z^cr!G{WTnT`g9O(Mo4-@@ zeu&?4x+MMts=A8SJ`W%xyorog=tBa5t2~lV0HU83wdLf>=`Q+!KE)vhu=2&^BAPXg z;rXY1(uA^?G64!kO`c3&er84&juxcu=`R=Yj+=;L*4^~ViT`Tc4{U*AFujp%s1 zx9OT=`a<#2sx)#V-sW-~9xn#mwQkV=V!NH#lVX*k(jXd;?O&O-v^haJyj>D=4D^6T z+HdX9j>fH3kN+(?WjE<%DX?xtlB1>Cl`Dxqq1;43K8DbgLgzM`3?cFrD|mWB3g+Kj zPgYV?+LuQJ=9K_Km+qWei(dRT8oWuTU7we+<@z$MM(2-Ra2X!6_S)V=UI`no7(_OO ziSVcGeBk)F(hPqlE?{gjDyH)6+;Ul!07(0bLy7O7RzhzRclhc|+K-lq)NRz&nWwUX znfM{!gN6W2ap_NGIjS@X@=4JE3OLi6W)xsbPqHh^5>q4tcmrj*6Rg$Tbq}3QeC(xv z83c%*xvl8E>Ji4W`ZUFbif%M0)x*tHJWE3rIg+Tc8>i)QBa$P6nK(+Nk30i03}#!S zSf9BVmkrW99F%Bo1kpeAzWZuctN9*pX0!Z&xlV87Hup;zv8VGl@#7e5ReNiKwEo|% zLP@2%cy`S`Hf&Hf`T)CD&rKQ^f0r1r$rXp>vY&RB75u8ULU8eLB^<{s7)<3TZ1&wI zGLK3NkivLNXvTyQpOo%%k82tA$$mj5eti3PFjLbQmvC7psSS70it~&9-0h&s&WxwN?Dng*+bIlfqwUFR(~Alfu(ymF?n2T#0~QVEQV+nH zbkHyqxIlki4Rt39d7a6R_xKDMS_Sv%rr3hw=mtWsiFGoLUW!S5UDis%=*tk|@$%8J z!RVRu;p#dmvQ4e)KbTKss>i4~G}b0+5C{NNLA(5{e@3Z2q2=nh&X2V}d()3FQUNEE zAdi2f=Syp8)%l)AUW-Uf*FVb_Ji+`ZlUthKLY6Zsb@4)GAK1hHxGk=u{c5l%r>Zd7 zWZN3k)}qMsrzS7;O*SvmeLyL7-3dhV(lEL=`YrVd*t>{)^NaK306j0CEZ(@Rvi3D> z$%svSQ$g80_GK4iJcTR+0$BiCPVH)pTwiPwl=!7q+0R1U0gj$Loi4IQQDb7rLil`r zyniTH{k-*2M(4hcFdCKqos6k-K#;}t+V03CMyef&m-BX`9FTGr;w=6(r|bWY^tp#W z&edPPTz-bdp7oSiZT&(O@Z`bheR^|us3VG!5*{FVywighO&j-puVf2iYBc=bW+qAY zcimvI*>nUf^gAY{n?R}#Rw#_-KMZ5A#UM#Ki%~m9RG0h~^ zZ7o*w;)Z4gB*C*fz(`KocDwV&{LPDEYyEJcPF(BjWh7ROF&Yx^&oGwugB#_c$$(X< zqR%ISpQI1D*7cq3Ga9(qup6eCNRTH{{+nWZj7!}{;d4Wm{Duq1Z=j)EhG#i z6+lNFKutPqlZiylmVoEOy}^+%1t%v-)>By6$8F`NFnUR=tKl^g+N5-+cqeIadP)1r zS$u5E<)wCsy0Y%i12DbO-sK{kjcay+7-f3%adCdDiWaG_r5?c`lkk*qu{wPaVNhR*S^FTsHMlr^7?|6*eNBrndK4I@S-9Io9MGpV{w(++ zo*2)pz;=q%CQ$^T&p!#mdhNv$bWOPHD*c@ysX04*v2n ze&hQ{>a`0+AXyi(o?|*CPDHKtyEOtDI)Wn1KT!I0K0Sl9GAs_0RyBD9NiQ_RH&-=G zOej(cg?viM--2comQEen;94OcapQa`tyoR(|1Z zaRm-Pru`Y~oPF}rwb|p$CcFHM18qmggQH#}V8sZmuU->zfL-9^lO1Y3 zaLLvfDZ{p3b>;R(?o9IV50kSvgo`<*u=P_f5v7QEI98FR!^HVAzcS*P7I&PvLuAnb zHE>1Y*kGBr^4`=!qnh4cTzQx|Kf#(#h+l=0`YO%zzK!GZxpVb?yqxRGHO%f7Dp4=} z!eukdTeevt1`l2;$ltv`ox|Xn(YL(kh?8vo%=a7YLCE|1{?p-H-N5CWxzw_nq-fUC zVjpD~P{wmT;i?}y2EC>V=g8c=ETAdssc{f=s86fE)Oas^_;Fe^pt6f>YvVX)Tx)7jj8 zc?hb;Ks<)P5hh#K&7B0NmC+-!Mrq72hb|n=MOz0IkJtqJL_!9;GZ)QcQn=*iPPFn5 zBNDw7;o%f&$;;7bqSwRNScKt_BE%#Xl5iq7n{fPbG^Kh`y_}z6X2k*Z`YO0{Hlr$v z2GTkFpO68)vDALD8WION9>$8GZesI0Fu3lfo^NBh{EVb)N?gAa?UZYzS-ofVd7X<`*rn$yJ|1?3qY1%V zBJwhGAJaL##a+48?tna+(%X@P`ktyI*N2dr7E`?GY^$ixhl?mGdGF) zBREl#?FkP$_cQ7v$d7bia{ktC^g);n%gw=($^t}Eyxfnn+A%TpOgNNFX~HI1w#uOu zCafOF@{6<{ZJdBdj|R4QL(y+G|WG;IG-UeV_~~UvcWcztJMTkvuZ(C zSKxjlTYZg=yscbIO3f>$hJm%pjsAMacd90MrYvFxlcIwOoQ{j5v%*XYvB|-jbNv7X z56)Bu`kYHzvQqI-bYAzP%#mnB42*uDQJpAJXPmBN?NEY>TtL1!yaL{!HXcl(=XKqS zD`!wyVQqg11mKWGW_L{}7|@Z*qhmRIq0u(aJv)~faYWtmk)wRE!&zKO2c}J5!IP+ zo`9p^a+JqykGkmk12V*zLYWSjUyxYYznqZ`uXYdg7A=4-n-9y;-&#mvv_6Ku|=W2#c`^Qd-o+nyzZHqJ4X9*5Ry$~!lr639En zvZWTBQl3Z4>=56q(2ZGh5?oK`Kp}*Oom9o@`|(eFas(4>h6}$~gR#(p*cD@#w696H zk8~Mn3!3uogn|~--w>SJN-{AkD`!n&O?>6u;*yZ+)8_i_S^ldQGT8NtS;678(vpeB zC37dGA2j%%(gEq}l}zs4^r87CMrZ@^yAKdP?cN7zuo=a^zx%E_}+Ir z#Dikv62Go&jJ9b99fQwm(D5fnxDR-r75994^RwE7C>AvVWRkXp$gx19!a+@y5gshB zedEeR%LfJefkCl5-41DV#Ut;8H{IPLR}=_CG{k;LCaXr#!@@@WZj36YZ9^XBrhr)r z-IE)P0iT{MTE?j64{C(wf7hcSQguU3y9EzrkzC`;d$WcuyOcx#VQ|v1uQT+60weZN zkPT^sMMwAMN=7Qxl{6h&H42KN#tJ|2LeO%{PZn${*8>oAU8oI94Tc2Z!A!WP%bb1< zzu$O?w*wZg23V^<+n648v80gMyEx>QE^{o6%6rqiGFZ$0{3(9tVZY__$*-TXlrL83 zk`6KxRxU#NDf7J>y4(&6utiZI?fF&me!H8#NdL!|a*EQHjgtBqvDUBTI`z5abz&f$ ziV#Ch$9&Eqjp&Sx%15t)1`V=EhU-QGx;l`qzSi!gq2mlcofcGPs+!Yxfxn?Lj$$o_IMQA7M?A29*gotm%%Lu34niBkTwp&2o5k8)Re~ z^1Y(nkx;b7KcG)6uV@ne&DNEFIL5?Fp|e}w){b`S3&MjR-arONN4oA;6+8J*zKO3m z1f~oki(_wZGLW^C8F})YKyv(<6pzvhSazD? zlffVAGQMjhf1c#aOv_@?#d+HuuNpd1D&qu>x%@NEV=MMqt#j&LPUE0(#0O~?j?sva zXx29Hjw_v@M$v%x3kkv#_MHEjTz|Y#!RcVwH5ReyB4SWA3~AI$XQcY?&WP5;vkTSy zuKj(y%N^TUFzbrQY1{yopX*L!YACFcGT^;;z@}jov+0K-%4CEkx8&!I+XJqH{@ekN z0EdiS|EiOH+ptI%0n1H?OuC2fCg!CP8r8>{kad%mBIHk)f z;I=&iX*2p-*W z9$1FZo(s+VF+W&NEi@WirFp%Zk`j8? zeX~n#dbqshpd18uFUrybn4aOmmSa=zDmU`r92SB_GWlo``H5y-P>E(ND~jhgE?3uM zNoW|=obw{O40~NLTB&Id-`4+G^(WQvv8!KF7L~B1>O=cHKyx8+$Wim|WPkV+f+^+o z-B#YS<>FVfMlaI9F1yLPnr>UVt$HDKp5l*AOTRojwq%M^hcD?k;`5-%Nq z=D#}t)oSQ<1g_1KRcUhOp9)#&m=U3KvKLOcuJjBH5ubI!;b29M7tn~2^X{j3v)7~+Sq9qoOt zlF)%E{aTPY;}q7C!Ua(+rQ5|RAn{ynGxS!do*$% zaIRPECof5<9=_*K^?-L6`}w5z-6Hg<;e5w_NByu*(_a4zSBBqDaFSVl039L%CNhHtnu|2`CLydqA&9J&qcmFu(s{Bn&q{ce z^#jMao0;B0KmgO+1tWU-BOF8Jfaw{jPo_Vp`)?cz^`YYq8KUM%6qi$rxVcyJk2 z=dWE`7^hjTNDFiz2?$)zXlh*c^~^b0K9grmvBMIwJ#C+o)rVNGZ%rh6+~3y6ec~Fi zpCJ_X{xUgHaG2r};Ewa_Z0xPT&E&gD!_h8v)5F2Ul!^;F5f-e0^4!wdYD?R;)%|%W zdsSlGzcngibH3Y#0Uh~7u;coZGDe&_e+lQxlQ%=Iajv(6s3cq7YRdDMW1;)m-|@f= zNR7(b-6wKFTUF`M8RHWyH76_ecm+CJq5VO#p}4+sycYh*o9d|D;$HW>lT4QKdFIV} z;TJnjo!*50_*m68Z2Qk42-JMAh$gs|(GQr)SQE}E`|)eZXkEQ)JIinWG>PPY9PI7O z^NIXux!fQq7SyeFUx1w7f8_Vv)kgW=>8gt!%EIgoS23|43=Nv+vc2%d(dX3(lAG00 zE?(j%#t?1>xOQ{Vg|)fx$c1qQ6})z zj^7}*r`457kOWVzFO2b7ABR6C?@IQ28!ee%w=Y(it-?RJpT(WFysH#<;>XaLZilN! zi5nwb#!0UX4PbKNs_mb7nad9;=PL$8Qtb31sOm*-VA}4+%(uc2B=t3DoO`2tIO5Fc zz)=O%aqP@zGqQNq5ng0^fq~+fN_FJM_D8`TCy zQ@66IZ|M4;kIhQGN9l;*Rt(q>NSQM!(S-Wr_FUqUHPn}^NFw{T?#nf{8}*(3TWXFh zuq($5{+61|w`2zc111w-JGoUId92YrKR}Ts?0qT7F?rZ556*lG14@a|`f=-NUyihU z^AmdI^XPBofPk_MIMcKon61H+$>d1sA^GGbqAwxAa&gkZ{+`CN{qt_u7Av879_f3} z1!H{-OwL@zQ^c{C#K%Qx%N)?ADpRZ|VGJ@2DDu#a4teea)yhs`;Y^!2TW0QS09HchOj_yE>ou<63iHm|B^?asb!FUY{km=T5K6>Z_W28W3b!F@ttH|tZxemMQo>GQ03-B<5 zLPm*BO!dr7w%1W7;73~)@dfS87nwvF(023tpa3R!#{I^pn{I_Mt06%cgsZneM7I@# zBEleurU2npR8$nY18ir$q+wAh?}uBr|DM~xzLaFw^COfC-GQZi#4h}q;aFY8*ET09 zJTP6Y)jW=!HrE6f!>WVZKwoskci@xB^v>zB3wV0S^ol{XA>6ER^TU`zT=7m>qni2%~bn^+HnV)2*ODx3i1?-bumVK<$fH4JNh<>TiYhFmIa6ex0I9E6%3tAe;@p}rFdh03iks$`w1S@I# zcZW#I58O`%&kx{9A8lvFpSU+`b=TD<2k46-_K?GO`K(6=emQSGZ?G&CylQ05Cu?SD zVW&ll9MaKQcAN9a-V0r#{Wyl0yo9@OunLlnJg&b=+ImO;PD5YiluL7Rlu5s`ov!2h z)#@UBxrj#j=eltVO9M{(m0#C*>n868hlaYJ^zNPhoUmsK`6B=LnAx|TZ@RU!#bCZr zr^zF5vlkR0;3x0#4-_PvjYaJ>3d|O|>L~$C@W3gHo1X60v1V&D5JZJX;U1l=NPQc8 zZz8zql(A9%3I^0`;`h=f#s2G%*Dtkv&50(j96rnjFn3K;^oVn58ac=7zJ`w=^>Xay zG6KoLSJRDtSAt5Q(9(JxiHIhu@6DWKKY=C69^->t^XW*{KY;1l>UX8!e6z*8_89Qk zteu$`H7heS8qcz8;&OFg^qWwCEx2UgXa)tPFa8C!vp_DDDKh^8fz;a&|5eSrhy@J{ zh(Cle(EMTD94kz}>&x)KK&OSO#o95mU$fixg=9cz4#AiAM{V8ZkIojOj3TdQXCJ+) zFFW2oP{;fX6mDjr;tHFrl5kKUOgdEN{B~pFMiRVEAMMUbTAssxpW11=T_$?Y`EX zq~@m~sz*UQ7_D&6mA~a+x$KS3(iHjWd)e8=r(&Y>3J=ti%O=z^zHh$>A*0vJe42o< zD$jaCF|LHONiZ56Y)s#GqSojj{ilU_BIzd`L(g`RQ39|cYNJ%oXlezhzc`PTMdEYi z902o_?z6=KIA@BBV2C=*s3F`ijv?8Q zJgRUE2QVt0|9e!lsV+n5kFRziIeR@u{!a6&+)w`!ROlXx1FfBaj#B7Tj7BO`8?Bu| z-k3!D_3xq$c*HSPg@;(6_5!^~lNhfJm2zZY097yE&=~`!-N2$1-+Q~6k+AS^(U}r; zEK*WdQFYA<-Oggwu8Y>2f#G2_J4M5}GEj}$L1P|JKaP|p-us+{W~@x4NztqMca$wJ zx^J#!2k|=Z31{+t?z%a2^dgd!_&`R(0>8a6^j)uC$H`?eTb>32iY>9vd99u zY9VRxLq=7XFkJ+te}TFv;WW55pUdw`&3p1$uzVS~RU;}+lEQzlx;wG30H4Po^-BUz z3bWghOksl)@YFj$Zwbzl?=@_ENgT;QV*?Ma%@iN=X48EI3NfgvQTH2fr5rC2eXk># z*^gYr03^f`316UqGx;p$|NF*qjZ1(9*wPBnsR$w|=7M02kq{#d znc%BU)iZ_R-#{xf@ZuPuIf$wyE=pVOzF)^n2fmeDqI_>tf-n@wbh*Um{_5>&EbwkN z3Gf$SG0O?$EB*L+<{;qE?BnC#PZYmu+;2$#j7vyJ2$6p+49$Xu5i8;=DrCHAOpVD% z*Kh;UoTN{kOBXyjo0`<3p1Dgd6$4-%OII6d7S#S|qG0awbpPY}3WlM>$jJXxI43?6 z50YL5;DB)INwzPmpaERn#y9(%aL3`FcveS&2Lolv8GRlGjM_z^u{tU~QFwZ&-L%eM z=C1%$EHHiKs@cnJ8~XQc+hh-{25MGF;04as);41wCGcrX(yFx}W3-s1{>#`5Ns~Ej zu!z5y=@5<3v?=%d(CK>31ur9if1`#0q4&^}OTMgPQd({{BDESEzxRLo@#%0u>!&SY zU{GD7tg{;hs%c}%x9f#-;6_cQOBusG$Up^xP01pe4CC)R6Kro~hgRa<$~ryGqSgCj zh{8_u5QwI{+qq;r2J(4TO>wm!W6!+()l z-Qt$0!(KJyXp&C(^d0z+cULn1>YfCMh5%rm{!Pk;wnjQbl%zeysjq2H=^5DyY1X9Oc{qrN2>%XwdKfji`_033#9 z&?YqqO0lh$Yc9Ex&xMP36d8bkyfIF)TG!I5ppRZ)=DK%dF7D-__eO9n1;7ufU|fFe z#Y=yGyGICwO;~b9Dy64fZWFmExpulEiH1lG^zH2}gptkjhyxa$T&ab1cg^4>Z#cC7 z`+k^IFKxk6Ri*{0@DV}=2h^j)*TmvFUAu?ISTWh~619-ymomxdhRDdmtMEv$W{S`- zJC0DYl5aW!V6>WC>1h6Z2QmRb{urH357wsNBODO$dz|x_DURN*PYLI~e zr&6ZP{@DI7EXt*YAKDhErp5GI@3%+28-V%c_|KcMVBC`w=wsHeL9uwF@n1~nhP2(l z5nO_l(|ft&lME@IC~d3hrJbh=Qh_|50gfX3K?Tm_v19EDkN*w5O3FlsxT z`#Ru*RYvQ`I!{%Qd0y5d4mL%Urh&Ssjp$9iK7;-)f_NmTdmQ$CvnxyH$q~k;uvE28@VEmlQvXuV9)T*evPg(V?`aAkWO;OjH~gEtOXpjWmw0L`oa1~{<= zU85r(_<2ABq_E&rc0<4R=W?wC08oMcXP>X<5Cr^CT(Xy@-u-P6l;-~f%y^k}ZKfGol%u{ z{4v0y&J+Rde!jir@SI4pDRCDp6u{BoQcuW`JxrP(O!Aa`Praia<#bLsL6&0kJ;K@V za}`noy8tI5oTyP)qYazbpc>nojJ$k*K}E1ZIr8b(6;NrKz9cp1;br_jh<7ND&0Hygd2K-Pkspv(%0l(y$ z?dA2xYUn#0$E%m^u25EEvLD^cQSO%zKmZ(T#wBdA1{#Psk(V@bET9BaF+F%0ZEX$o zL^3G#eJa zNt*9AfYe)ECG^PX&Uh`-A>}U(n}Qr?SpO=XZC3SSomccu>PyHsvJ!!fBcia5D(QYS zS?OY;FzO+obtnsVkGb+^@^m$Ts)>M!qqdRl@9B}yZSlZLWzkEB85kHqL_!k1J=+Fa zCgJ3t4GIHC4s`75 z{~tGY(Ku~4W@Epv0HUDyI;9*r>Jgg_ht7}*>`V=6PT(sz;3^U@cqImc-k>00q+`Q` zB`U}_Teo#a##X7I;1ZNe!o?AjhNZ>|qlyXGu#Ef0JQ$?*mW~3-IeRa&%vJfZx-<|R=UclM6HG<-D?xOzTMjx~F=yYV0>KMg~7 zh#6%P7XuH{>p{W?Pd@ZrdnA)O`*~QD_O$(~=yI{P)diK1b1kTTpe(%XfcqDm=^58B zUZ6?RPvN@|WZoZtb4Auvs?B{W^r${YMMca@@2M*o+93E(b;y5d6h>jV)?~|dSBnZY zP)*Zn9p#vW-;roO)xYAy|0uhbcW>U&Qf@H~ol{G7NZADi#%9L8_8XE&66Mjdk^pOA8B-VWhW|}d2uD;Q7 zmk`PjasYdY;yDU3Q^;NzTT*O{M95b<3B>v!*s5(ryFl1DTx>uavDdF{hCTS{8((Pf z7mBUEm@xZa3!e&%GY^=;sO38UhKDwgez+y!%qK4Mxn4#L`ol{gYgVW$&uwAwM1Dr_ z@Yd+?!{xP??w|kWY6aCxGMFow-pok+-kPj0_U+#E>VUePaU$BN_eL;LGmvh6?Bh z4PI{j^V>6IEhss-%+?A3jw>en5t;b+=(GNRj{eVJj2q-vJJPMEC$f8RNJoMXL1dY+ z+0G6!=8*q%6cf`zErQJ_(!PQu*8w~7`Ctl2C(Rxigc3|pK>e#slBRG=ge)ZU|1kEJ zQE_ce*JvO>(BSS)aCZm}!3ogNxHJ}myGtMt++6|$2~OiK!6i5Z4+M7!Zr>*7Jl}oJ z`<{Ep{R5*J{e!V~t*TiyXU)y~ff3TuRZ54(7{iRYF42fS*tDFa2P=P;w5QKh%IwBM zZY9^KWSmt*Z}#?*%HXQKN;&4)OjjUU5e9K29uL#%cRCl<5);C8MA9?3V>~>Zps~+l zk;OR58U9sc4enCQGbog1j45RWS2E{;+E4%)56HW3Z8OKvjt(vuC3&+9{Sp%7jMbw-o-BJkFLyPvlDd3!erB zg$WzJ;DB89j=8@DefEn(;u?w{--^Zz4z3;~@deN3I1fTMa`5N(OKv>zM;Mh1is zQDqS87~~0Jl?V*hEIoU3WpqvcgTB-RH&7AI7+iNcT64ZTGGkmlW?lfM{)80o92tcu zU1;udVswkgfR9TFCPKmO0J+=8jl+US!w~OK;f^8K7|4q zs6N9>(Ig2d&B!3pYx5?2_3Bms;Gl$@9AdG?*TB$F#5c<|HQZ7%GJ|jNcVYudsS;Pd zc3yu|DqLS11d5N5WI!B(SuC7qsTmM#y#wFCrT$^C3>b;-vI%F__~VA&$0 z#3Us~imkpoTB%NEH(Mpjkdwok$qv*{Q+Z>pSV_F241)HaH;|54gYagcr{b!hAb)7h z{tGK}j9VkbXep@@+t8x%q8|g>tQZP-AO9-(`tU&R^CTK(dBYS^(MC$ z2TxKS=IoQJ&2+oK8XDCS)4r&;zL|n9V}Pm$m6#W`s;a8DX&E4+HjVnV}z>X!Zj zkFI7Lu%HgRr!ZT2kE@b^I|jh5+1gF<1}Y#c{?Gs#$Hy1Y3-X0v@>xGSC%?=QRP^YM z?Y2=$8iww<0#W_P)CIi%5 zF|ljJ!xD}Ugzuc z4(qR+u-zNQ7fk!Tin!X!Fv*|zm-Hvt5W;8f7=&vCulM$8SaWC_a5&&zvR3~Vy<$=aMG(MIK*Wbr^Km4xK+uK#yS z%|fEnAYAB#F(o3NR2?2ecqxRk!@p?ZP!=-M>&sG7D8>xN`RnRB^8smWe#t1Q9Tp4+ ziX;;u0?Kq!zJt3n9Ap43SdtdRb%+8OTeEOF(wB;T()vu(rM;PM zYs)f3me6yl>(m4Ex0dSuhV$(WOpilj0 zvA}wXK-83W2mFHjtJhK?!SnmsT$$?rMlsH-2vEmV21T!;7sZL;iDJxwP&uz!u`y6W zkuHXzy7s{CYBC5a3ArLs2{eq6`hzO_z{!>#L;Fr^RcogA+sqDtE zAJNIn=d+s`fy>BBE3$&#UUb~jD-6q|x`@jjQYEuNXDlHg>0k&QIZu@YYjl#Om{oIF zK%da`Q#bn0Ut~`=4FNi=QsMs=#$8goKMXOFee~kCfeZy~U0)%lY#UYM>RB;WJZ{5) ztvPd4`xCwrHtSDp3>N<*Hrlj($=EQSd?;%smi>6ai2fkS3F?EyxNE07-0 z@0IBFIhc_%p5ukmvZel&#zfE{110vj!>4={;S{B!J~uzt2jZb{X;C@>rN_tirTWcy zW`haiQxzqN1pSVh;zzX>6hmG*XL6-t4(dN0zmjC2D$DAKmDkh=e3Yt*L}j!ca`qJ2 zG{TbvD-ZRdJvFPq4fy9T;m<%g^kDbzPM?T#mrr-?aM95);N*YH%dp+rL;L*uewp06hw*1XZRhL8{yY&ID_vsMbKz>%gtHAo^77CNTM?ZFD^05!Y^)VM*0@mRT zEGCUVjUiI-*)aYYGy8Y0;~U`*OWZ236;Q|1!HTGPVUSp74$PeX ziiu}cHqChno%M$H9>m>oqtt(0x8Ha%v=uUJDI0cxB`RBse@{66(9n$c6f#6D1Q;On z0;kJ!c_1$hz*e`9isC=7V&b1@knzx( zPk7rDVux*h^IRTbVmPOCh9%u-u}Iu?Jo%qK`G20VHcSNo!$Bu-P_n&qoV4v{ z&=${A4+nMxXbS@`x~9p~QLfCC{3GlNek2YhJM2tIucI8lVr6~V`Ki!fq@9w7T0z|l z779H(jZKY53B)72b%uEmN=DS8pc#*xB8QRP$W_QQ59Zfk!H%qqF+!3MOiu_iMS8=Z zoxxaY4^N@DjxYWB+vS0%cThseS?abJT5WdMZu_bwo8mlm$era z^#E1jnfS}~&n>=cgPnYx8G>#x98V|DJ_9u0ZLh6JxfU~$!aF@~l&0c&Ime{GRo60; zrgtd)Go1hawD^?dfT8M!)ai*bp!PXITid}!`NK&@{(~+v<^xfb@|Wp7aQW&q!G*je zd!p6ZMP1Nf5S}j+P7x|Gb+5ex)!Ew@k|Aoyjed{y|D3ml4Y;_Pd}TXNsx(rOxtHD4dY{-C?2;aaRtzdI7?+dWAvKzpe{5@^3FotU8+&`Vlh? zTP(VSGt#~KLp(X)0-|V5JtiG^m3^G^6j7P!DGmf9NP^8x>=XW14gwxphcy6g08lxr zTt`I`3LsXT>=0ItKX9kypK!-~f7|bIHP|A%GwJnX0&@{YhIGKQyg{?PO7}I%uC+LE z`Gx%s8FNq9x^+?e^C(GovxkbNGSdwtCVj}cm2-U^!E!!nu6zh}jQ7D@PUI=PUt&^h z#C9})M8>e~>>3Z{6F9~Gyr8z9>rdr9l6gfD&K}(US&-2J|a!ItGYa30n zCX?=8>J&2Jbz5fUHP!WeBh{tk62gy#m6=?xQypfN(4LxS?WmL$HTayu1VaxO$74A~ zpuU_I2CA=7OYmasq)2*dL^9RH(1roI+}^U+QIFr6s2vk9$y9gb0_MV2zqiK$hMpXx z5g))T5N}ipNJxHOuJ|QagJ#O$6xa2_6RtA|58x+ zb2vX3HXM z;*BZDX$7^OL!73q{{CV$@7$RHO$g!HdMF;nUdM?DzsqXMB3!$TK4nDMI??1b<21(J zJ6lEdFOJvmSerk#q{W7{qua*)ZplasV$xz$IJz^ufuZLqe!dCTN+MKGAWPX`69c_{ ziv_$})LI=%pY4s6CmYS1WCa5);=F&m*yv=DA>h>e0~Ccy!2T@Dw`fuu=-$K5V*gzD zM=k%3+y``(I6Mj4tKJX`fuO2>XaY$cB&jROn$mG%yKB_YzZ@vE`2u?|(1`FqDC2*^ zIv_k%yZ_1*N$N4n3fTO$YtcPjs?s@qWU>b)a49abL*;<9u{^5yjoGLlCy`aER^<7i z&B}{R-AncYQfX$Gm z)?WSFjQh`W*^I!nhk7TT89UuzCaYwYW{odB=`J~)dK>t3!UiXt>_{;yjX|GP8kT$F8D9prssIW zI?W?iW1*!^vBd3s2yW8ks|Bhl1JG(@p*gI5oX~8(qaFn(tv9UbXk9GH&CeUvzxeho zdlAcAG$=LPVV_OGnx-lReJ)jk9V9y0p&FPrjfwKW!kPKvDeZl?eQ$CF8-$V^3iX%{8z?np zaK$>cjDXL`DmQ!ySkIPU0aQ?Fuag@Bz`lbLZcU$X>}N}y zl!qN*nETOqgk*i3!4BL`5Dw($J77Z~VdivnJOzhkdeIbL7glXDh5Y`rLESY)Hv51QKcay*TyiXq{3(;jGFc z1g2OO^ac((Q9W@oemCpzFaaLXc`FSyKmQ%0g^Bs@wwhOgqD1qTYeSW!kP1~YYmiJa zEpZ@--O;0>LGli2aSLL^OFxFGx>A`T!69&~zY_h{bXSw2^~m0#!69y}Z?48R>lmA~ zD}Zb#M6sO4V&pw!rkcauiPh4oamU zyOz8%lc&7osXYAMVVYJU$={d161J^W?Au4Q@e(bzvDiSaODqLG|7E?cCYL?-cT)@{ zl&|dee^iBeTlkj(gy@qmQ`kj<oNF_gYBc{;4WLb3~%E3}VtdV#0Ney-U^&V;NOK%gpD`!O8azDoN?(ZSTy8MP7I_1Ct_$AaesH7eXKE)_rF2YUk{AJ|xP07@ z1kAPf@fpJKt!c8l7+$2lN{2Lg)OPR3wUph-vHi>b z+y~UPfcS)@05ZGp5(WM{`;D|Ti1`OL^~S5v>`d6V??>i1qdS=ur1=j}khKG~Pc!b| z`*ZhpueY+33vGmzGX-&haN#a^wTAkq199G+dNZDd<+kYX!u)%NRpQYhDj>5eW4P>l zfLD^R8md4xAZv5cGV%uT>!(i}mqy954wBwzFgmHI`5E&3ACI*BSp4~EQ_kI23dbuw zSMf*!hC#&cU0*N5n)Vac9a3M5A&2SaGa>Q1T^5!95p!?AJ($#%+OpQNaMP zRHCW>0QwSPHuG065H1cPfgyJ|mO3X36LISTZ%<7~6ZI03TX2%tObxkTUsq%^CKWTU z6%w34>`<$5rq1fMuN$st1$URx#SI%SUW6O|-N)1}A1e6F{shfSkL&C0@g@#Yf+hS} zsAG>_t++LabRZr}b*g3MvA5*)9ivD=W>p-J&F&dbuAIB?O?+Vcu@0$fb^7DBB$FBu z6thEG`$ofF#N6LgXvC&g#qP8R zO2`2&=t!kbp6%O2V6an2!RU35#o%J9OoW%LNWL!;f9eEd{xG0dE_aff|@8I zaReykvi)-X5j{sgkCF1v3$6!EqW15QZZ_?(UV;fUaEoTP`Yngs&RZ8``N3zMhgO^eomgxP(8f}bOMC@~j9E3tx zn6TA&<+ljwZjYnF#I<%sA@t<#;<5Df1tT7qg=`rK@m)?@7<4avNKYJaBRw`T1=+Us z=3TbgOkF=8L(dm0>))5(euKCBx&8H9Zxc>ql{gdXE~j1uZfSoq_Rs?#db{1Drm}qB zg56=nGAY8VQ~p(fr&&N@**~@`<*Gtnpmn5amh9{F<;uSz1iRN=W)jThvrJSwaZ4%! z0k@zqW4x2((^d2NQ+I7j6xACeHD@V?WukdfxBB;?)^o7VjvNWdXuVM%`cC@$&U!x3 zX`Sgzeavt4y>TaZwpxIjov8k;+ZXo&8M%Ae)Pmz8JfA-+W(n)OO`q0VjRy^Qir74Ei z9d~D=KFABJ_?-jNpd#8%u%;N0P-0=M0;wgaY?|9k~ zC`+5Nit6)ZToF-xe82_PdWL`KS9|jPiX~GanyjOGp|bzYBD~;LSl0d{$vZ8Z&;9Y( zv}pY|PNRa%dS_)V-aA+GzmBL2oC&3~Jl2{GMZc}QK#ojS_5C$J!g1%jVW8H=Lcyfd zkd(o`jXUl~qFF_F8cZ3VY#|yvxbdktv0yG_>`m2TuC+(!ynJ;581e zRN$hH^0gzC<0&*!I#b6@GWy~kI2F~_WKl190x=FkbxI}(o5)4EKAYN9)XeeCEOu~- zEYuN1unV&G=eW;`kN5sv2`FK9u(ki8db9uC{y0`|;ikv$x6C{%=*h!HoQ>E}s1SbB zW%n-21zt!14zhtl5vqsx5x~WE%sS!ObqBMkkl5+3I=Dkdz5Tv{5v1&EOKgiLnK*zn4CdAT`5PSDhG(sy);j{mq;MBtX~+SUV^ralRm$-O=liQI0tp@+ z+`J19*#37qtyH3(zXfAxYtXThN)@9Ce=+b#OzwyyExRI<3R&Y;Brok>zkJKgq_uc}=3F^|SMZO*)>Y-Hr}g z8AXzC7}sSiU@1E^(&8)Z`J181g{g+XMAUs_FtJMBzmbxX?&>X5jC6a9>&^sxp>ihY z(c3U|a#0d&ItIT_b;au+pUNx+m0RainHA%BE9R# z^`mimt-coyvf-?b6Nlr_M{StKRz@Sfm&3PD z(r^{j0wWzyPucD6D7Z`1kQzPamz?FqKmpI7L7FVEP=m~Ng!VfhF`*dRj$C4EP;av7 z4N`SHrW6radGxxbay!WXEy3}3w;#UiIIzL56t$j%zZC275?LGikch7@v#p?=;W*J; z9n9JJbwXLZxSKt`)MCU;>+Mg6!1m`hN+TYiDL(^G$;!pRSYO_S$7ZvMLyC|lK64IX z;Ng8diO=fUrq6~lpK3Q1bpY&(a9uvH&;@VI1|W}6nK$%jZkkV%m!R-THz-+v) zJ<0YJXu`jIp@|^od$~}TI*gL;cC!}FW->-Gnr;g$UFx|+{gF#`AOnmTHGaTmOd^pH z?FUULGzwUf8dI~C6-y$y5#dH!s5^>}yI6cRc<{p}r( zjJtF4dQEZuXP*iqr9XC1aF>n`r)-8=Jdp+2ZT}cE1r+v)2Eeb3#w#ACnla@{iL9L4qo46m-3fqd#psZb|u4W(J*Dnd-N= ztBr?vm;lnUBHj9Ok0jYOREUQZ==_LB+Oj}El1X)Sn26yR}G?Xo=CIxMa}GA75C|hspel#t4@k^TVw5&e!!mFT9#hl zH_*mRmo6)H^?cbGx;K~9mPYCJIy{*Sc_mh*YuAE(FDUY*=yvt)6~E`rYiWZ#9VJ{V zuTNhRr1a#3Etp?&!u@J>7`&V&XY%oK){VjC8~8W3DWag1QDXg=i^uRpZ!Uu!C* zdpa@cUtioVMn@9!rc6~~Qa8}+HB{+vS|P5P%7>>Hxc-oXfgpt|#BY^#2^zb+VTws) zvYldihlm1JX;_f}J-W5^L_I1C{YJBve3=f83ohjxzdD%4M0|;(k*@u z*Druq)`4tBxQMtd{ef?f3lEt(0I7(M#clXci)YBFTBgQlwVaexF! z`$gg;deIyJJ_N5lDIh6>u?ynwtml*uRYV6tEetkR4nvs3wDnum|JLXMlcSQqV}<3c zcs)#IuWj}#h+O5jmw~m5w1}?QjA?6@M7+*-7;u;}3l`-3-KF0`m1KuuA6$gwo)aA) z6VnL)S(`evtm!X1NK~K!EZ(P0wv4Zuj z>9w(<`ucfAR%g!;Qkbw7mR-aH0l?cv>bG!sVPRp75>L96@N}g3juK9ioaq1IzWDwt zZ8VijJ33+^{B9$^z1ZjYYDz6*uc9Hvkfi91XeJ*bga+9_N)85=S2X|%c=Q^+{YWU6 zus^rT8pQBnROV7LlX1gA>6tw13*+loEF#z@dwNH`Ju*xxbKI-M@rh86v9IPJ3g})JDMR^YApJ>0q zc!M|_wF91GFT&W5OykBBo8e4g(N+G%ETTR#X#_OXS#x||t)9sJj`*RpqYJ?P4m3&{ z5F3zY`CNd7?~cD^k;&|42QXLk?^lUmkPm#j<0;s>4al=~|LFO?Zjl3cgjY?lThYwj zjnv^rFBQQ$J;fT$Y>YF9A%2gZgj(_F{BTf+wWeb_x7uT+rK~tgKW%+=e!8$0BUcEp zl~LlXCpl$vUR+iHY}EV{ll=YZvvHZkkYuDau4mwmpWC>%@v8 zKGv?eAoV2av`Ijip&*U-mG|J56fmm^Einhgu}bR7k%P6zM#+5`R(MXh{hqfA zTMmc01J(*lyJ#E*N7d%k&qjSXk#|z9o}RwCjBn_++)3MmXQi{ThtoG+0>@Fg#*}E$ zQh>|3;g$U_=sH`=&tx!&zO#b=0-c2YuP2}JtSU*_6H#q7CobE7c>2Bh03LLr-z?`D zH0$0Lp7+5?5%(}whEZrM<$R$H033#05EYZKlRqznU81SPG zM6Hf87g4^ux9~O9*o0Z{fHA%#U-Z_!Lx4BA9Iqz0?0HMlo)UraftN3O3in{n2F==J zcgzb=J%`8{*M>10Sk>DgV8kS5Bn23BNr1wh{ph#J$s7A@Ea`pwO+w?V&a@g&MWP4t zQzfz1Wk6fEr5IZJJ3(-70SmS!_nrp@ksB`WM3P>qBQSwV#q{(tX6RU_O(}+Gufx7n zp&D~|Yto#aeJ&NN0*QUhfONd6(~u6Q>-hTi;04C3C>-WJ`Bc2esiwDAHcjbG%$!O6 z2g5mRrmN?nsE*UvVu`|8W~A~%K5BYdTs<2O)AQ>3FaHIi0^#6aD%Sp8oZ%gN_>&%G z{cL>Zq~2zz>JEK!i+R&!dkHd^a$2NX4rRx~V)D(!Ai|Ov^xS`A1#~`WJ?zbC{(7Cr1E8b3S1-Y zz9je6fbYR`U2929IU3sR%c<=MhULp^czfEXdi`lym!Qa#)ooE&66_?=4EqCyOJiZI zA=+of0(*78YT>QFyuYE9vZkQeyO60po{5p5P>2p@hE;sa1%-<_M~ka4FFFK&lZiZD zx|1%_XoSvKdFE#|cLJx{0=4NCUxbHvJSh)$?ZiNij+~ws`=5f~E>rz^W=#N ziYF6L&sp-9xW3;RI(3temOr04OhvnVD*nLI*xW8|r_$f{LN!vlNyQLrt&0vu!cFKj zh^#Lwk=eqZQCBqy^tq(rfV?8)UrB5cWE|S3ngtB0y-yD94lZ!}hh{J4-fcKekG0q; z=+h?@>fwhcUjG5yXpy}hq$_nTm=~^C(jyDrbNh$!N4R%%Sbx!o7VGm3T)(mcQ zp7JGIip@k-`|XO(0xiOKB3jKG7Y3?N5P<5xT8zCWyku5`?hfD9O1~q~ctKth zI3nqwkBAi341=<=IwCYS+#KeZ5>iQd6TxOB6p|EKS*;|x=-H~lUJ<0Ba>@o|yBdt) z=pmO9AKu~WJHF6n{NX=|^oHPMfQm%W3ljlGq>*Xtd1OazMYj7hr9IHM+Tr*2AZSpt zgd;xxLsN6tu@QDpiCG&Q#KlsN*bep3+?=u`VG<7-7ZhB&4BIb{i`PD-hsSHROoEbh z7n-6Sk0i(9bPghQ3jp*T9@c%xeGa(&r`(~mC98H~cq3lX_F6*rKgHHJ-`)OFGANlp z`9z=9k-}j*Roqyc0Vo#+cgDXH^qv;AED8X101@7`^sr>C^JzOOhY46+*9G=y>PyW; z>&zCh2H#=3qJ20Uln1o}O+~&=hWY={PW=&^jOB`{i9h`>2>lb#t;6D9uPCg?o9wC_ zwgh{)pWMg}+fDu5qu)R37Xfc(Ub6A}ndRz(^J`u~8D(1}QCA3-#kTjC?mf>z8o@=Dz|2TVPprL{zdiI|g^_?n>D zBjlJw)|s0O?P`zhsU~92nj^cc;L&8oZvm!s1_7I5P7YcrjlcEpoSZAFgg66F57iAj zjSx9E=fc<5Hxc*r2g25%;~T;HxR)FVVc!LEo_I$M932|bfAEehpe?;WrU`Dg+hJI1XcL;UOXy)?Go9% zpOoa4eI|qMV)BxAp*54EEfNr!_z|nI9NVV0;(D*>r;9)7u={s*xYP zp26l5JclR99gRcgreR2M(y8d^myK&xy_fH-WA;w|qQd{z zEUhwMOic`kp`8%e_Xoym>mJDco~AQ2G&Grftgq-e5de#p{**d=2fTjIo$4ys+{X9& z<(j|h2&T>*Oy||QvC_tnA?LeS6o;1MJ!-R+aL~hcr)PqTQ`5%8wGtEN-v>u=rv+;d zqvXv>7*3h7t?TY1aQ&D;kk`wXR@|k_8nG5g5T3FH*y!cUgxsjspt7?fl|pe?iRMGx zJp~d58RC%5wA!?4BSv9J{u9QR~j9>e?NcT zoRJU}Pv-kPaR~JE)%SCcFMPGn6LaPE7I7l^a};2P_QuudfqjrEwWswbC6S$Qz!P4Nyj5q#|#3>Gy?Qd##2vAZAQ*i~RJL zdqJIUwY{}^j!2eu7_~(d`U8S9aj#uX!t7yE(qa#OlT3a5d0GTL;SL>Zx4c({Ik24c)1kYz<# zNxwu^&Jr@;&jC$05iIyYIc%6Dj3!n5?)=aF%0M^^&!GnQCa`gDSqb;vRdw-flvvpB zI0jZJZSh{NE$iMAPI^+ac?%+FalMTku@P74?b?f-`i-NQbrMA7_4gvWbQUl!LF6{l zX#Z5CpWl|7Ro3n0WY{}!)`u(2Cd!MpVX}tn(f9o!9KFp?KekXa9ZhJ~XrQSvtnMfx z^Q*m&$6waFJGqsATklE>0q;wY_TCNg?dzmKA8Lk^?$%1dr-i~TG*2t1{Vdc?-$k;qJPC4pTUDS39F$JCtiKg3NpG2 zxvAnp*j>}P>>bqMt7a@Uh4r5Sm{CqW%{u~se^Noqh}gxo@%t+L`Zc-}Xr%l5hlSGK zsuvKSCQHVb*;jqq?P_HO1mk;Frt_fg2_C}_X^|YzODSE=%)#3yhcbKT=qjbN8y1ez zW~@3+JcHxTeCN#yV%wn$%97k7_0{qFq)5%;#|8c|}0Egm%W(%~n^@s2kVLB~#z;pyL z9nd%Et*kSu13zXs<5C}qoS+$T0A#WKm7v^6d^I2ms7Y+Iitmh~s2tbLS+HGB-M-cON}4bFWL2Lt>V>3cjx ztzeHX9jz7D5e_wf@Wx;-vNgC7BX#o|GMBmugo6y+Bj1|wF*@ln;K+A{wHOHnWN^I0 z*zB@DUd)koI1Q7?l!SSP^NbKYM0GVN46Li0h67Q+iL#-mOj?P4Tj{8^tU^wMY;e+UD2^9uj`>dKv2_nN7Aw#axox*cNq zRA^5Rql#g`(!eUfJDe933I-^o*jQLJd>`@I3=*=?fz_qZhzO}UsF_;1o&ZLKX1;tP zRdD6-)({Y7L~Vo?d)jS{Y{E>S&ke7{X7X#adpHg2QANS9U}&=h55eG~e9~fwkUw); z-un}U=zNwl9>tg ziN6i^dsJ>nyUo@lG~{(~NGDTphH8@w_1NUV=bdO=q$i9-@}_WQx)<3I~`o%>4)cDHdMx8%?a0Ta5zYxPux-xO>vK8V%^}vpr{I#1FvFMz$!>&sWHlxP@-(BH zW~FR>LV1ITf_H{9WKX+}C|BmzT#zzVlE+FBzDY(|>N28TOQ@dw5VdW8D+cm^*4MP? zgx+=ovU)j@c#UtS%k$KmA00-CPz-4SyE>6;2#|rj(`4%B;I>xv_oPGhmmwB?$O+Ey z?_t2uA@PaL0@iR1+PK@8GRS!&L8yLbtZmhMY1NtOb-xVgO}?OU`TyQWd8+SgIL8GG zfY~~*DB%`xe=&>9yi%#EM)Dp}yim6($Dpp?6nl~`1oI@TTz zLYq&wcFvZK>B<>Yg<6e4fDxLexko`&T2tBUjGG*XWlwtm1*7d1iBRlal9p7xzrPGa zfP7+VdoP_3UE(ifclePgahGB6@kcE}_$=c}Ay&d4x7g@1iskGVRE(TNB6fbN^hnKuftk?-_6imT`ybG0~qjBN~tza z@e*on@l<@x_hDsxR^;f_{;%J{+FpszRArIge8fGb)uJ(_Kl<)5j_i-j372PcCe(dB zj>r+~02pORNt}Y;|2QFLrOApr)9x~r;1HZ4!3eL?79YCt{6U6X^uTr zG2WfxchkkO&o73ck$_-K)#GvY$557L`P}I^?1`X_2mjZUi;m^X4=8Oohwm^0#KJ;1 zAteqBdu#>D!)hbvQ*}quSoT(|Ej8`NRl8Rl&+@Mr=Sp51OARlF0ndgEVDoLFa%{1~ z@N2T*e;9O7L&QH(gb~nh$OwzaXG3vUObCIamTGwJ+>s7eSyU2$18r<>pp=*hm}H5Y zWK|W>Y!Xs#CQvh*XSTRLA6hhJ5r5ZcF%UJ>}yS346TYGwAS_p;8*45`U-pVmQne?h=gqJ z6|5iiqW>*Pe!gP*sZ<~Qv;}K+6ebUtiB8RpA|dXEwKDRaE%3IJdOa@XKdT`Dp8UYw zin;OG;p5!?sDXJI@$TwWqv_fbD(eHA8_ypO_)sa}x)oumft@hA1i*|X= z{HNwPog~RYFqC8CWWw}lo~bUq-Ru$Ju5#D9c_Rv$N`jihc1(d+oDN`KDVXinZjAop z1;9CDjMOZ!oBdhH@wFvV5xDLRASWV3DMYSM8UW^~6MuKfP~FS-YgTvJt!K|7a_4WZ zUeCoXOG+HI%4?}MbxBtF8y%tHFo~++pzMa|XXnC>-gJLN#tWk=D#8(Of)y8wF~FMG zvpvmhHgEKxVV3E?fBEZR{>K$0#$0xcowPf0@>0tO6A)tyv37IVO4s z5PZ+q+VAlOmCR>2S*>^O37F9(2x%wJ>@ffN zz(dm!Segby!qu8Qi(qiihXNJu9HfrcaKdW~`XGDh_0Ie&w()Peh?bWYu7nYVE2&|o zbF!sAkib&d)AIA>T=13kqp~%*%BLJ3*|jbw)^ER@jwG`(jfOKR6n@;89?`X2P7^yn z&d%+8ViHf=+e?JUzn*Dq!o5rlF30!x2SV0bxg&d?c0ZETN|tL)0=5# z#0jiG35lTRnDHnt)--E`pL5?zpRGI^Gtg`-RFsz>Dkt(mndfsOkD5^TE1y*MhX>F9 z7=Y7)dfMd8dsa~l^>(GrpKOsgcrViOd#nX5j0JZMO=rCM#ZjuO8{S86M?UkvWM`}7Xi&$*#RY!+eY;$wM$K(Aj{~^+i35%vyWTg<{l#Z2 z$NVsO5RdL}iNKSd;(*Z#Eah!{lLWqIEP5+}&!})(+ddIJ`j061&j(m>qt0-L0QNmd zya*(?jlz2G5Y63s)lb7q=oaRy zxh6sW+TbxO_@ff+FfJ z5A1#OONkneUVW|xEQtPVb55z0Zs~UKkNUTD@y{ZJ3;0P+Xu65yau9<1ii3Xa9?g@a zHH?7bOjB&Ucz+f~4psI^UrFYhn&@FQjk`D+v~$S4DZS9HnDP)zAMhC5)CUIib%}VL zSwGmcLVloBU4D48-D8ZbkBRI}2qYSgq<BXO7|cYHS0s1U?vw_l+T z?)y-uSS>yImq;5SRQ-&s6_xJgX?H7ZCtDBH;`IHDuh>Ec^rQoyU#Cf-h|8)Nl;ysX zDXsTusou{9NyNBdA54Eyvk8 z&4u6i(Qq|M$v4(^#~KTS56PuSLUgNjKUMJ{iIXes9%q}Ydll`yfM=)oa+CQ;+Unie zXx|9mN<>f)T=wm4-g6Y_oj8X*uXYSBf8G03B=&$bY*jdL*F@I3uqBdK(Qwx?NaCGE z&CzAh+a8Pmq?-Q(*`Flj^5^c=Q!ENYtmrqqR8}#q9>+yyqTZZ*wM>|qI)Wv@WIeXL zpiCK|YodP-MRX6^%DZ8#!OShVqw?GqeVs1x%v>^P5xf3T9ZNlyBpZR!{B+ZvzW?nF zviQ>Y{)ctV{e&LVq#x}yUr-@5780_7^=V{O++UywRsIvFCf)HJOmmKz+*SP8MFRwQ zw;uE?5mF2?nB|Hrsr@>63jnx|rVJ=|;C1+)*!TR&>P zpLRLTj*;1nHyINRe>nX#?buqk8S84pm$TupEfY zWj@@|=5zbSI)SIZzrQ11Mbzpfjn;ba$P*88onGX30A#v$;W;9p7(CEdt}dM5o68VxL7^E#C9>qVeVs$m- zm)mK4gmr&=*$@UNd#?fShnOy*9%LHEi`H6xnM-|Mf#IxwvwzE2QX1>i;USik~l#Dk&6% zobVu47JVX;=>YVQ@OqT+xp+FW5C1%~^ueK#o=qjIr?JLjRG@>$r}6z zb|_d_88@;2ivr%QL@|vp1gW#@`-eUbs|$RCb0;Bz*IEhjSC;cvm<=}j_8NFNNYkm# zL5Qam=O>reC+qYp@wd|Isz}fv_b6_St0PkEF-EQtXa+ylw^zA>D&g2o&2Tyk+4RWA z+ZIf*PN!d_V}MtgW1|Y+l0=X>Q3|>q2Br{PRt1U7RaCeDb*4gS@4iVwDW=x_#6&Tb zvu6N1_mb&i@vZw((>`=anD5k-1xOQ!T+a%CH1VwHeH;){?N~sX5E(+L8X$;H`$&v) zK~&$0dV*6IgZ{5cqEq6jK)VG3Kjs`p$LIj4GPc~Vnk@pP)Pv88_OvClyv1esDA4!J z!UaJVOxT|yi~B!lczC2rtD3PBsA*<|V6WWa3p&9|5lGHat}B`0AXk2Af%VjFfGLkDtu-oabmHW-dF=pIv`y~ZY(7|npDqlWK$0GnI zMNbU=V0Lxt6`Po-`62e*GEXvG{Zq+)^XAP+WMkFm*}W4$e4giZd1$M1Ri6S6e%RQt zvj%RYh`oMsdtvv*pNz=}8~xgY+!a_{mX6`EU93&9-*NdrjJrMu_P$olHP>9%Rmjz9 z_&}+p!*_L>gHUJmPiXf$Zb_+57w>q#ai!yzlMZ4KjRL0oF%)2azyieb)JhAIe_REr z+ye7IK#-_PVRRSRwT($+vz-XDbd;Y3EQ%YerzuI!D5z#J=l`CBkt{P(dE(l8*N}W1 zoR^#AvbP;Fmgy>{s+=gCz?_4~Cx_eC`~G2&>%=EY97dOC622>TX`YdCBxaN(kU;8O zip@Vxo-#t6FZm0%4`YR}u>s=C9(g|@q46VJ?M%SXjGPfRL;lgE=EsW-F5kOxUbHIg zq`NZ%#R%)vw(A*is#rtGFa-sq2TM`}?}=0RUp_(FX0p@Ug_F0)_q4s!Q*HDzLhOG5 z-M_NfGYnXK2<{=CH`xtOQ7QEQBPz#I>3OelDiemYUweM+*QTqX5SQ!TqK=JDV0_`) z-}u-fq0w??-31**TU4C|fuw@YX1n<;^l$=A2CiZ(Mx7Q-Z``m{)K&&>`tX{-n(+Z^ z;gUo|o5M9My0qDeAhC~MO*|0e{kw6fu|HVAB;GI1@w(zvWd0 z94j$MjmYiuA1V4s-B?cDS$w}Q*$9pFDvNlAF86+o=nbaCPXal_fsn+7DK zd?<(mE~~ELn3KxzE+0k)c*VcL|6gbF3Ki|Gh}wm5pB3fcevJhk5MQr1gf6^+Ots(1 z{OK59BQnSG#IVfwZ=ft{KcM|4uz70bnUqs@d9}8g6l{VYC2d>%B>Jt@V+G;j)$$Ba zl$I$_csO6nAq~)p8{pkn4@pamPAiOYFwL#&?7Ms*<5G6%C>En(Brl%LU;TcZ^SOY$ zqk|&;G~DrMtQGsa7DP~m-?I?{-uS2v{A35{FB#5AS?nnc#y5vhPZ;-kK{_`amdLdC z3mDd?3v8?cU;6?}u96!)VNvPn;lPb0v2k&0vlRyM8B;~-8%-yzVpOg;=MGP@qx#Ev zb3xAUazsFu@?m=`37i~jwM8P3rG(MSaSWH7Cc^4{KV1Khe(Cp-p;`c@0JTt567*NP zG2IRLFfvWnD|wQh9&=Zfzu)0IcysH_PoN&wIF-Bui2+LqeAf5b4DLj+Q|1Iy(;-k` zE^JO&KuYuJCrB@IKzl7Mff2uC$gmc+K8yBzWkC39wE;?4dA3tuoWN(0Kn2frkUpnj z-aS5;y!}n}_$#1MI12%_g!xm(^E9cL57p~aB66scl?T(TC35RCz9SQw5A0JDj2ycX z*bN<@lDyF?g9W0WLH?pq8Soyz$o)?QRyIvICTWh?#2%+24u`YjmIOKLr)!-q8)4k1 zb0!&d5{f(Kr47�zU#(8rl#vBDbuw9%_1h7$1Kanggc*`RUcwa}zDZHqHak`N0$e zt8Y2zUz0c|DUi0!Tz(&w77*FLkBXGMERkMeI%l`PAwW?)aorbg65{EYe1RL0{&7r6 za)89BQa->5+|1@`ATOO1%pgxorlbh%Nf~dw?9aPW(w>>>0k0>mPlL7)iIM>a%choR zoRlc^pj}OVq8mrqiN0Ln&^19I5|3(4MvpDy+e4l!qm|K%a)r#;2Y(;%>kqmzzkJ2bS zE&$BIV^XCA@YR1Ry+SjztrvZ)G79-Cvi?Q`t0VwMD%_@RiefC9sU!euzi8OquUIsb zd@6I-{kkwle~}RqVJZ@Lwb?dD`l@eWJK?K##z2?8SPAN%L;b*weXtg@_NXWKr~F*s zz)Cmyq-XI)!Y6O#)YXDm{kL}Kv(S$5*kN^&O*EV1s3Pka0Q4D$E4M2h>L!>qe_~i_Ne>g+?FK0k` z;v_yB0}+)ImT~C#TCL1dRf`4Na86@VK*Hy9s$QGu+%Xd+Cs0UaFy|fTaJ2;4xd5tuX|npg zO0h;oN5SF(1lqRET8F>lFbr@RlJkn^Ou^y`6yOEi%8&B^g7k8&B_WG_>C{ zcxb_CwT2P}X{v+YM!pT*MleMumZWhy@+lMT#lM`K)s(^(=ksZHnj?u{+;h6*j8Av6 zWb!N_QmU&yc$-jN8%+fXvjv5HFy9vnRmvUjhRl8^Q70)qmxt57RJG0|X*L8=Ht zWy;lk2kdBflmzEhY-RxE4pPOJ(<1^I`SNJNwx242=V%zuB-L9<_c zOf9G9%lwU;mY$YbOxG6fhui>Zvj`}9Roca`P)I#WikW=~lUf-z$`8qw3#66+*Z#&4 zOhqq!Zp5YH33Aj@HpzO)mM$|}J&SsjpFU5!k?60`{qWvBPSqOSD6h9DtLhrfNMB%F zeKU?1qL+I`}wu!Tu z|K_y@x@l78j0Di~o$|?N8u8ppNpT?|C6iiia*p8{#Oe#(Ndspf1W^<)4EQTZQxkrz zrZ7KWx&{Fl(P=a}l@tyo429ycpRNUv@HsY|jqtN2S=qqc0%iU)+{};=KrHq|xc@6W z`lI5BJe~23_n~BEvF_j$03Cq5{lACmHK(KgO>aKmPaN6)jkvOaQ*0yh3Kehz9u{Ra zx(}5ywu}!MnM1j~fJBa}%6*#=?)XUjYxjEeEzzM(4^^ZUG({1qyWWY;Qip=!hW8F zYOSnWjRnE76;K)YayK#_HzfCczqC?tKsYu?HoY>o5!W{zS3GJ41)v{g*^GTM>9^UiL6_1sDbxiwpE!KSflDMo|q7z zwN6cD)@j59#79o_SI*A?1fek5b_vu-y>yGPe`}q8l>t%#AVP}VR(|_!DKSv(GJ8CK zfqPC0pfFng{0GA7D5FyW!&Tz6Az`LgyIB61s?SP67eVniDS#(a%&I~q0^J4iF-6fjaF~A z0*mWXZlhPS3;I)sP9O9$V7p>cNVgU%&4vX9eJY)J_T?O3lO1*uC{ybpUmvVLhDxfb z#TU!GI(WQ2Up*)J+<^%*x+jT?LktTT23RKwn6vDf1Rb0;nz=egL^ z(L7`ONDY9iSM?Uj|NGHc0D)WNR`mO5m~sEFqd6(~h7n-1e}{qwR@VQ%`UN)FgGhRh zrdoDNlp?3AhRS_Bz6g8jBCE&WCJ4GbD9M6O*>n(;7u&pLfhIzC_9mpGP>N3w!^~nu ztKyx>;(YmhX<=PX66#yHb{Jm7DnDk-H3a`F)=agkd49|86ct2&Zt<8)Q`Vn$*2(q+ zZ~#z|dFC(TNDEilk1*csYM(><*#z4V3e!3)tctX#2X^jnjPU<5^sABnd1cS%!VJMJ@p3``N2Y_7 zF5EpiqSZd*e61|GhM-_23Jq;&svwCa6PG&@o~B5kVxj5xR<>vVWS}CZ4*@*&K0}ua z6knK$>Jfqw9KLxIU!JU=9P=pa0hA5KheQy|L@|Df%=9wrza24zKV0Y*jj zM#3=2Qbvml+-x^^XE7GrejNoM1b|KvSD30HJpguvd3I9meZrRy=c<60tHwT#0$s&( zDedg%0K%Z~SOiGfe`|ez<#vsyk9jK^xg;GwW!{hqe6954ISWbMak|8z4jC!9=na)9 z)D&s7*kUrjMdxk&r^@;f20*Vrp7n~wJt4G{_G)zy@NamPVFFOn08>`+Hzr}RW?c0# z6p|XHSAN`~G@}~Z`H+-|&8Tc)S~Cxn`!|6A`rRQo?Ec|#^A=!2w)`bll7Y|RJM*=W zvBI#$>eyavE+w&vBzd!H`1J|UGrV3ANf-W~^Qyn_z$?}#$Gl+fYxRwrGM|$J@M1m| z=8nNpZQN|*oip(8$LDg|m37>XX^63dbLmhyNl8uWkwZRQ!TAe+#k?x(j`t{#x~W&V zva%seOymwxfSy6K+IqQL45d^b;v(h=j~x~L1CBxpfQ(*}c`dNe-q~ZxZvnzHHE0ic z;#Uf!^QN!FEg;rM@qH7iEvMpA&kz8gy@4{2CU_?je{f`g0v^!@j(JF2?Z)oGG@M!gwl6& zPWxU~VsNemD&cTDIssMgi0YLd#t`qG!03Ff<|QBP-&C0yQ(V&?O`=n=2v`Pg==r7M z8}kr&Z|_hyBJz!BMfV;v3_ zZ5KZZZ;Cmr>pL6&-kdr!i&xnkzP((f^e7p{Ut1~CEkiz}X-bk&k?~~M=(d#H%IK>f zU>I4Z;73JeIjawE{bkGF(L?*II%*mILUml+i%{SL=i-iDxs^+B;8tIwh%7a=E*}(9 zGy$KQ?Q%F^pToB{FN>e3)rHH!fAmfzxj*V2KUFd@DC=tXWfeb*!BI|fjEWkg$|Jiu zltYahXZ%X>sm>MK(EsWoWU7jE@PZR4zb4GUw=^ba08AE&I`bPEz2ZUFs4Yb$@zPqg z-4o=OTq}huAYR~6!;VkD`)krBWzNr1999jqRJgeV z_Wy%D(8Y*s8I8l!Jk>vPp}rj*;uciQ$>sJjx;MHYrrF7TsSO1NZ5kSUFB~nm7$)a7 z{0MY8l-+rxRCIBWT3A!>5b26Dn<%*GvAK2ZaOOD=4ohWb6RJoV4q2W<+ay1^tPXtT zj0m`;R@t&eBhtt~`JiC^p8xgf!W&iWt#)m&zwBlF)|KFxgygHT8H$%JaM3W@h%bUF zuVEPhkA;f2rqwV3JXQzjYT0p~$o{A};Cv+m4}Nzuq=aygvXK`9d2_STAZCgAoG`R2 zog`Vk=8#@*pOeU?e0>qM(<8jm!2fjbSBkGLh189`p};Rij}JOQku?s(qKx;BnR2W| znojMmCgCCn)I>~*ctqZiim@*mZQboD z0RnxA6ofabp=VPqY6kP!p`%>6qTKXah1R!7RE*R$_-;ZY?m~@7D%$Vu|5@cw%^(d= z?2QhdoGS9})&|p%s9(r$XC>npGnbW&3#~4+1qPUPJE9yqgfw^`_Zf?^P|Gh4{A zWmt6YHVFI@DD08{nWvG65^wbfJuPsRb_zD~FbjTco~a zv(p*ECpmq&Se(EiKUhK3A&(txrJ?QQQ+#WrH!qeWc zXOaTHG)i$n{lg#FARaIcv0{H^g3Zl-;fNIvTXblr2qZVn-v@BZRLqUxVYMuCIIFeh zB#89J!R2@avF<|qKV1a4ANH!@(o!b}9Jv)7>}<3~-8UawgMISh{u8_+B)QF_Bl{R* zwtwN`!q7UfiQF`$SClM;m-YResOl{T zVZ|luWc_M9{30+oU;N#B*VjV~i8N&A8+`DOxFnY1NzC}ROVFgbAnsj#3?J9|3nwd} z1l4IBTyy|si?i?}hoK1Y4*l&gEr3BjMI*8Ofi23>PZLPxTu?l>CI~H-s2&N0imee# zZ`3Otk=s*$rvug#-TUMed0;&dH7Ma^xL7Y_d+Kj!@|@zA{ehXsANIevoGH_nn?9i@ z^=wM@HnDWP6%wm1^a zLP~G|TnoOD7e3Z0L4$@t6hOe7Xklrd(Z@2-*^Y5%8!F05j3rEiLe|DgVq0PRgbMUM zUh>uLhclPBi>Uja2Hk}p=JGy%vPf3465$FC4nZZt3Uh(~o)|Tcnr0e$Io#+%B~(;E zI%vUVaBzHaeeeof5i7N;;3r{hLPDm988m(_w}inh8W|0{3c*754=v5Z(Gz|(uuI0k zC^d0jA6p0>T~L9vf~=HptmN*DD{1t)$`=DZU6^5l8j+p&C8ql!V+^hn%>dy`t6WZF ziZarKtlBP8rh?}sRcz1>1i_PC4tuzz_><6LUb^h1)2~Seo~U^oqyNl=XaaP%zF;>C zVfebAh&m%=v7p4gJc!mnLLU=0@3bIPgKk`;b8D>gVZJC_bJ8q7^D+Ep8Q8Ym|NTWwgE3 zVCUDW*)JI)`(TN3S8fHqaRh5&iZO6Acj7Q?(}4#Yly<6Yj&4Ro%Nh{$I!KzhwHCsK z?$AT|4$v9FEzbsLn0)+8~-~VhVEGdlAp-%Dxi`_mT zbl(0PELBba3FwkMYJOl4^1sK3XgI(kiTQX26V?sZ3HWBIbCo%0PTqI>exvSstuCjJ zNnpzgKi+QbyT$7ad$#REINhKw0epTPx-M&k52;cpo|cg>XAaJH7aX5Srn$1g)hveO z5|;UT3kIxKqH#yhTJSI5?>l0~W7el>8j4f8IUh$1jLvsrjI|yr$H9?Q6kQj80xl1lRTPnFJ(z*hA$Q(< zOr?7lp6GRo@7E=J(&00%rdZmb-xe&rFC49UC!*%LtfNctNAmZcr+Et56f}fNL%+qD zfS4zd(8~Ae4s{T#CbwmPdud`R{X{fcI6dTR$rx{P>0eLuEA&#*d*Ua1T`3JFK{AMY|i+H zcgHuM3Q-BX-E)MdxswfeZi3jpwX>XW2r^;vDMo3#X9=qKpgf0$N3o2~C0mQZS6g0DwQlm6|a4M-CP7|{itYwO$9ZFw7G7IgOrGb@%0yp{e(`WN_yM^@jnCtoUbQ8H?evW=q z@6gy!9Cah6smqh!Hb>jI4MuiY3X%~{6yBsL3*nRVGOEA1STBa zzBBh3g+oI2tFuDQ*X$;P#mwkn}HgB7V18s`Td7X9;Ytvl>c zT;>%Xum_+kqcg7G_rnqgor&i*W=c+H`o zAMz#|1NC_;WEsmHxfaY*=bNkS>ig)x2eVE~%eJvA zgCSU;Eh9X&kq`?as52i&YPqo!%Cu!w_1qy*M?BxhAEK@(+USHOhuI2ZXA}cl$1?y zzqng+gUVvO*VD92CEI(5)lWu?_uTj2E0Xi^et`8IEd;u~#_Ty0?sQo%i zrrwAZ*iyIaXtDS`Wl9aG+{g8Y&wu<}oYOSUKt*9{`F>;HH=lQTd=6Q{+dZ88c(yk& zOK}WdAQI%i=$y*7@)tR|6+G_95bBCJXsbGSj(BWO+=JAj38H@43SXK2_?8cuaLKyH z;4`fpiuM)eOBDj};30&@)Z1j-Oyhen#m%e-jv3Xl83ylfXzoy5-Eo#Rs84 zus>cThAB9O=^yL2pOP)xQUa6rYfb_nm`NoFO(<~F16^m;$?Dua0dDi%gAZhfUZmJL219zaPY-TP%Sj*RVHp z*Y(NA8)~9jzl4h&^pv)3UKvXC82vEn-(AXd%eW+F?)~8?q48)!=ed$qdpqC8PTLj0 z7wb24X}J+?W1L_Xu_D@4=cdK>VOhh+GeE&?1G!<-Pui`JFUo4f#rdAcq0ZRc_bSyt zuiPnxZ!yRb0w29DLg~_Nsis0mNe*@6v7s*ZvL@DQKh}qE(cO_ler6owjtzHA5L?D} z-+X}K#p0j*%d14R$d;YxHDI>&i+w;h6ufJbve}&#rpIMjJd5vs5morw!1z8Kbp3Xm z5Ofj}pd0?qQ?h748_t%mcHd`#IVtN${g-|5qy!;1!aL-n8tg05JREKvmjuoe`idtp z|IOT4A-%J1yLf)5)Jk(N`nb$qq-;ltl(lO+dhTmFdx4O49bl=dLuAmgP{B+KY%EVSg8)9LH}YQ z5PnGSvbFFgG>C;F>Dal6|Do<$;b*I7SWTn(0=vxt)fMhUMH=}{;fFhvnULH2#hCw zgxQk1%Jdll-SFnq4oSp{Sfd#ysm0pOSE);AmQ|12D5=@FgXhMQ-HurWb zcxn0#!c4XzQ11mk$R*D#e{M!IT;k(`GjS}z-sr0p1BNyCdNd+xjIINA&V5#@bUwv; zwU`^7Ij*!ZQr4oFn^%Ds&--CwBjKR0p{xo~I4*#_XaRXr=5??_;Gy4Y4?6)6slB5!`PcBp}# zgK?dy_I;Unm&6_Od;!z)1HjIQ%pQVya?Q;>!#C(}6q;mDm$^Bc(g7!bE~sy_bR(m< zJQHhsJ@glO_N?;C{-xv-0cWR2qUclHYrG=2Y+hBu>KC{bcehfrZJvA$^l?M?>qc=& z_M-wM1|tpfpl#Z&_miLDa zv(lD4OETdk6U7p8pGhH>Fqbvxy#cl#2%aG5o1+WYh4irN6CZWeBX`Hc0Jxnc>5L^E zK;vL`0ROZ8Sj#*Ex*!Qq=K_!WM%y$acikR0?4xny*u+&nalYS|_RB!SR*lz+=cY62 zAZ*i5HMYa~*s2jPpGcjgO82?)7I^Lxi@RM(ctdv@k5{=hMSdmf(-sUhikffFE+vlS@&{+Xwl*zN>D=PMJ6dvVF$ z8!p&7WO#r+f)BRLpO67m8Jr(3fsqA3&H422sZ0L|mhcYPDOdhQgbhocT#$%*jw(ZeTucVY{V1Dxw z%N$a33bSIRU-I3D5))=14%8z`u}8grT6(K5e?6Qj_>y9{`@O4VPZz<-EXCOp;Og8x z2ZUxbyp}XWo#_}Ubml+s-a3T$bXD~0o>b^T8&TBAzTNsnRTq#fSiGU9sdS^9b^tQ!A<4RlJ^l5Z-MqA2u?dfMybK zdxMUFmXUPgy5=_JJUOGwiUusky&hIET-M=0NO3Bzia&648)`m7vco5TB=jV|!VOOq zcsn!mia60-14wu{BGddU&7X$(XBc$G0cgMIdE;QdtkS{e@ddR-(BWQQNk@BnEBfQf zcV3lUd+*!K?D|9(#DgoAU^`oEKO8oufDOmh5Yt1$W!s^|B5c<%LL0IX5|;uqdT(P5 zf|*b=kqcTNM_H!OVQBaNrA!KOODJSy@+ruFmg}RE-^c4Su$WCNY>R1ZCxwe+^d+Sg zEuVChK=g}IZdG?k{1$b&IcBM!)^V!=x@C8m9$D)*3CRwZ`4{mnXe?R1mX5YEthrIo z7)sC_`jf_@2qeU`AwtTvvr!7(027)?Cn&?p)Qo)9=|FNUzc?{}j9ATS5Q` zto5em8NvtJSVjB8a+grv1@5w#OxnI^pVLZ_J8WnHhcstB`j8V6U2^5cOTlyrTU}&J zOG!PE)mqzSf4|tJuLlYznOqhL0gDsleFO3nYZ|rIx)sGC)pFYQA0R$mgx4|HrWGSB z*UEeyta&p>3o)E3Kf?Z`gaosA9ooGM*%9*d<3l&Z0@lb% zO09)P>aXAt3&cdQ+3j5$Y{)QRwE&jb{w3=l=@8MU7+AxCN4xjEli{1>7_|tSR+#6H z9jL0>9_=c<=T}8Ck@t|GH+$oF8hPV7@zSGpaeG{bqs8uA0dw7>yf7gfwO30dejrds z(l2a~Oi4pjAT}mLf$SiJcG9VV%23bMwSnFveRo?i@?!qyV<*7RXfm06Ygq}f_83XN zl6vwxFCkZScjTWr__cB=$A!&J@=nV#go3WKnwDb(-`88L!Dp?M+e~R;j$Xx13se+T z7)M%8A~UrufJ|I{u*IP+?1$@S;5S*EWj^w5 zCd1n|(t%A~31Z|-AG$~`Ud9A{DEv&58J8}KiXVed`jYC!r%ea?Y?c^AN_ckhs6j^0 zdt(ygB7gu$w5$)=3T}J8ObkrD<72Om09stIsL_+s0^E?{1X`M!>Cp#=;ZtOges5T!*b5=~;c685Kd&F+r zqf|_`8{RM-FR%IbMRG?1c-q6r9J|Ouo=_MnO{-8)VoE%aA~eLkHRVw=0*PPEfCVld zQdke!*{3*fB`w$hvqFX!ASC?yF^Jz(e&D?$?{nKP@!H^#i1;xB3^mH4q=ouT>_r#3 zOx6(SE0XysY{$Rvj#qGdI(tzABTtJ4pw+QdDD#zCuE)69te4;&*`3AV4sTHD*Btq; zN$XT54O>fO5fxjQ-0z($-O%~EIsK3nRZx}qu9&?HS>eDSqa$lvcC@$-=$yg|3*qVj zEs76a6%SR!ikG7Q!q89^k=#H(Ga@X2nHsL~TAVLtrYPT39CjQ8*S*EQku-2sgc2r4GX_mlh2QpK~N$f zoar3bMUpYuWM3wcN;l=65uzyP;*|AIH^GEhhOaE;7lw-%#Zr1UvGW!s;8gb<7tT#T zOCUsBI|)9qs7t~lq6TT$7NfI7@HX+3ahSPe&1bh*U{qT>9Q&UOn^1YpbneY<;CWW5} ztJA7wSr2c#jJ!Ck8r2lIol{uZ`I01FlHu_Kj-KAab{2~whDJ#;1e@tC2mW}Ivo(k5 z*sB8g*(5CYe&n3hliB)9V;i?9AW;+Z0{@@WeG zFvfrI8mlM}%dnMh4y;)|UE`IguF8_D%IF51idTm^{Y%XUzN8GCG67ylff9p~l3l&x zt0d@QKP&PO+brm9>nuPG@}wM|BXES2De2)ruNg`b_3`?nNp1lrp-twKdP{_nN0qchr zjdGVq=wS=J4Q(Q?)tk8uZj@aYE$m_i`Y?KB>Q)-U z9k*QR(%26tb9VYbAw(u7?yBPC11f!};9_g1?to(SMiW=YCiKDSl=5Id4XT{<7Taqs zA1}>baj7h?1lrUI_GCXW^bbh%HOFSR$DxlUe@TPiwvM z5h{BS!+R6GZCQC>L%VLh7BvDh}6qIA@cQV`dSkM@h?_>PA~np3Vc(v^+UmC zgt8Ck9K$;I`BI(dJ?E$+-Mf>8Fj-+ECu|bc0Zn@KikvJ!rQ8upR78n6tgBU!GyB+U zeZGZ|NnHq0eP z#*ZJv=MCA5nv-n{y!F8Xc5@_uy4JnhzDn}btV`t|v#wNq2>GulfS_UvCXmdEoDd4E zKe+6=Lj)V472fNQqZe!d(>u_=)2U}ENz3&Q;Adl%xa`mt4bo%o!q&ZmMTX| zZJ#Ph4+_1g-ioidL^R`+(0|2Y5D4%>UH+)sAp9bTxdVGdFDf9XD0nGcWIUEF?DDgt z!3{AZSm)ztynA{GY{o3>a7`V%we@P+1Us~PsJwWn@)6YF5qW-x) z;m2^c+`R-9UE{s8H#gDxSQ7tYg6q{_s zgkiatg+M*uB5lc@hZ)ZfO3-iPfjt30mb?hMUo+fmHG5HY>vZdPn07Bpg#*9@Z?c-R zQk%`UW^u>_>3o}^i-g^8$orD-yU%7@ol~Za&Gh+*+m5Krg=W#`ZfL%D-cnU74W{K% zZ!>LMdzm&M$38rbT6b$?fKj`XBN0sUJ4R@hM{_Ttn;HPB;b4*7z;?@3<~ZJniG`kXV>v;<{RdjuQjpZK*9A|d z{St!u2&uKQv)=Yn*ax&$oAn_B;%?cQFqy8%bld5h1)AmtZ|-1GZe1?Xk?!SEX?KIW zEVA$QF0XwI)bpC<^C|e6g*aoB+v$ z=d7WJ*@`m6-ld;bu$k*gI^w&yWX_uu+OVO`7w=;+6L^`N*Qc^=w(_4VZfU{`-8yj^ zkKRoen9EyWwx-;btGJfquvWcYAxkI0I2C?wy=?D_9|r|a-*qWP;1-!?(NbGw4?z6@ zjZUc0cH@f9mgn|qE7xgin&foP`J}*%g2KVY?FZwn!2VA>$q>|?J?Gr$)q_CC(9LVZ zp~Uk-&J*Uf5YmH*trPvJToO=j?(!)Iu%)XK*Dz3gvti=Ve9i=#I=dQjgrC`^?ZfB) zDT}zrDLbXq?{R-dqw%nSPG!#9R9RqV!)JfJDZ=EB-6>Ws4m0LpW+ zuY(j}ei=Fo_~QjRXh$hhWK`{U1OmFq?Zf-ee*yHMKLENP-}5ahc{a#0tPAv$YJ((sQT4IxF@DIjvygQYS*=}aOXIE<7IJnVU*bR+6+zhXPyot9Kmyv-X)wT+xE;V3&kE%MwT zy@Q2CZL$2mLu#epkX2KAi_zfFYi9ob!153WVW&!We-w_WZks# zrkSSF8cxy7i7%ejpDUH5pCJLdA1-QY_xbtOanl?zpk>5p(N~2SIoMf9*w3;Cd=UCP zu!jl9l?EBze6hR=vTA!L_{)L;>0I+%`}ULc$Lpl9Q@Q>nsW|U4aRb!;)l&GqCU;KW zw6$u0%j@|*tJRwUi3*01hw3ARL}}m-p|=}M_17wqSn>x8l}&zDw=NH9s0Yr@9A=-T zNYM8#vNLj#X2nYQ71z{80Eq6K`UgaR4akK?skBJ`(59r2%0{Thg#y8>U zB-$W(C0gv9?pSsC=-q&zn>p6CMX&W<2I+JRiTxc%Ju=lUZP$0n$cx!SciTeJ1wO^H z@Ozv4#Qy2#qAq9iwyt$OY#vYphGCF9xu~?Nsu{)yxTvwogA7ri;iKV~ZSQa1aCvBv zdi}=A{rU?ihO7Q+pf{F!`s^S>Fr?ViOY=-;-l5mh;G}N*2xEPDsMA_r4Q`wq)tjH$ z{Uh2-F#%kD6PNqq+M zt-Y?OLow{}49^=+b>UC(Ve_*%y#A$_acs&JsFr{#uwZ`P_ zLu>Af;c~80ygSF~JdP*n%73e&PrlYQeQ_{@;_B+E*mS*UKf0C`!ff~FCYq{cnNQbi za?E$rt#*uB20bI5wdHG;f zEf%z;$kg=58f_7y(aGPUR;c{7{*DR>*$i9}Q>MQ1Q}ZK~@N6rs6St=zZ*h{70lp@u z-MvwL(}y+EzIpe0LZw!tyEiLXA;(8y^;dR|yX_bJvZp;J^AZfDJyCH+xqGKWM0m_8 z{Z^|`A*ALvx1t{PvCALbc(maveZuMzTY=KuUfYUio$+-gVA0+DtlYDZo@``auA|s{ z$sjT+OMS|gx*4{-zVGVZ*tnH_gIJz)67GqRp9YAAUxYkuA3BGS?j`oL#myXCs4|H^ zAWP~2=!T_FteCn1t$F1RUdMC$lX{r)trmq+?UC5O*vw!^$)@6spvXX#Qdh*`ms*;G zv3_$F9XrAg5gjNGUoZKWNYFpPS5vxHG^%24za!s$^V3rK+`)tPAIm`xj$YuI&t!t^1H z_(8V}bNt1en}FAqHhe<jdgjV%p&|9`tQ;1@o75sweTN*gwRh?R0>;`PYK&hmW}H)E(f+xU{se=P#h^7u)Gn za#yFdtR2BSqoRN+G$waAzF#${vv+v5av{{wOOo{9L^g3))Enz9*7P1-U_}qILWj=A z#GWfLy7%~~Tn1rpmL9wa@~qhosbl=u+Zsz_@lj+q2P>m5C@zw>HX2$O`DsQ!Xn)gQ zsBVxzYYmF_xxI0no4s-NcgTDaP_IurxXn-A%{1De8g6Z-ux-U{5Z~08W%XurbH1+m zE~)Wle>Fhf!-7(`X{9DX6Brr+x6MCQ$uvN$4YhEKoRqFzaV_6JAo({CkbREaO{Iv47qqNlsReAg3cuGEuF$F$CCc?oK zG|0S8VgThZRsrHRzwyp?uXuOI_d~nn!wMlFUG24YpUVj-C*k%8@fY?mtRX{>LW}B? zxAqI2{De)WUDR__eBpDCKqBvq?tynMi9UjqS>r5X=Rq$!C}+dCgQ{kAO)RBETzgqo zOXB^WTTl)~hC?;ZJ?3U-R=4@QJ*HZ+s?fDpO~>XEk7BG|g#AZ;yy^FKDnB=1Ss}a) zVlkEWd2(*3i-1hT_@lJXlhaWeQT{VQScHKK#| zgLEf!4?3h5|3!=i^`!SWeeondey$&w0&5~Nz1|&SyW)A(u-FdloZ?U9DLMb?hsHG} zzETR;jv;s{PRVo}@300Ym%rQ#e?|z&F#@@m=|g|IP%NKj2B$>it?+u&ZoKDKn@;{p z6Z#33qkk+@E`>IAjw)D4ToUTTYhv#|R#!d}Kmh!rUyurvu>ju=rRQAn1H^3HO{-3p z?H$h!jj2uzmJyKfUPitC8J`!?DE5{PN56LGSk*mO^HfEs^`@cEK-fzifA`&b!-RH# zB>?0Q9D(lgJGT3Ql872>>4~%xlpFgDp8tYw>WU6f!o+;8&#Hvd3*dF~hjg5{c9H50I#jEzzAYQ6pds7# zq~58nAsrEf<8cu0T#^{*iHjCdxEMTCo!oNFeJRUMO}FR4dS*75iFV-rCVS0hN>E4tS4{)oPIkNzlAbt{%QJo<%PlhS=qqghZ+JOC9GW z$3!km&9B_RdJm@*qmXLI!F3-TMysAlhxGz^WQ>U@6z?}!2Mtl@Di(&a4g+rPudes# zB?{POBfUdc;hm<%DKy&>xMH4$SZ|_3cc+x4NM0MzDYlIN$nj(Gf+7t?f@{#=?%Er2|Nr;C*VV79JGJYOI;RfFUVE-N#vD`Dd!UXvLAmjUYV1pC zI3S&8lwufpK1hLKJW?WI?x347yc5CpokI#NAf%af38Om$eqzl~Osjp!bnLh*1Fa2A z>mq%wH7o7!o+;=gTr@dBzJ$=Z*Mn)lwJy_ruf=-$5mu9Ro}vEB+#43St)Tfjmx*ORYD_P}QGUw>Yxm4Bxm^Pa z7)@8D-n%$-(9{$&Ba_>7f%dk+7CX-gkCkwT_zcRDV5)_?7r$7xo{d<{I-6D0WzojO zMhBLNt_bF^&a6TwJbLdh`CqlWz25xBoDXtyA_Ji$7#!4KwgwCJKUUQzkol|abP=uZ zv>cupN#|cksQ^e7z`lk76A308UAx}SjY@X|6bYD+ z71;9DQ=kEqXb{#!KaoK&yp+~dwaDxBRzNm7FvOhPo-u7?w!@p>>1a6uIP=g2_XM!7 zzM3(np78ieRe+WOtHh1G7-nB$YHN}Rjq34Kon7Xii=t@p#oWbzW>|VhFAiwz@s^sdqGx)^SgoTCZQ+qx zB!x$^*weY-d@2`}u{FxMORiv4OAeSE1Td`wqraGz(o5;XbSM5x^*_)$*=Mp6vvh6z zli6~i4m%%xbeYbI&Ntp)31uuDXo3hNqFM&3yt?|_!*u%l@H*5-E)G6pc2Jywz;sQNC96--cnbA^a zFK^sPi&uB570t=aV+PO*riBO8i~kgAH#J~{$qI+MF}2bs;XZ<3H+cx;ce42&vmUg~ z>y*;__}Bc<^1u&do>lEuV^H_CcG#DVZ&b&E4;(9aQht+TZFx7Gb7Hr~=YdOH#$K{?260f#~c{2UgUQ=o1 z9^WRsP1~4mF9{W3f!|H|tBS}6WrLh(nX2=vK)EAE8vis>l99pc}qgSPVGh)rA9Za>#(5ax?4vGQNC1ndipas7cYV2D8Z99rBjat{7? zoRAbdEygS`wS&>PNgu(pSlxWsfaBfVD0n_wE%E1ft&czEx|jCFn(0JLq`J|CRy&;& z6A2=0ZDizC`pzW=In#VZ7bj(keoH`xk~GgpdzSrxe1@!ho0N!2lDS24`AuFU&Zm6_ z1DG?KqX^>UZGZEV@{l`^^xMSsr=D)^n0O!&*`@I2Fw%se)&&}Mxu@sx8<{a72`4Jo zsgKG}k{|GX2dPC=4Bl6Ok7dvKMUhD&FK}mh&RElUmhi4Q9nmfeA7DV4K`Wpk_SHaZXpj;)%8|ogqD|xZZ}P(HXv+ zU-tp>uAOAlWvwGKK&$(%7m(9;y)K;|QZ6|#@0xCYUY=eAT-$!~ZF4O+ALgDIj}|_; z6ayHKeH=_R%0TX%_u){jZs39gWBJ&D(E)YO&)vHy6}`I^nNszL&)w%u6~NlMvukxjfJ+FFgJ3siEn5y> z`oQr)f1l<2MG=T3(#Z)?4XC~XflhQ`Mh)=vnB6L$2QTOE2Or=P5JWldO^>6#@qIc4 zDlXU$+w-Qa%f9cONmJ4zgY!NG44fDvR@c+vU^AMUD`GhyZ!9;Wpl0#=`}MbW_>$Bd z9o^0*i!Xkb=W(lzI&1h;-~U(nqJYR(@-MXd+evi2F28BUz)6e+QlJa?FB8{#T?^v3 z=*POn9ULN(a&pzPvbvyPUAJ}$iwJ4x)PtT{c=1Hr+~t!Vt{FVjrHN*F!nzo+(1PrQ zP!3v%DnQ}xIy~jn_QSPSGTDd~SA0A@k)FoyGxr|B!c-#8%`$gxsT}4Icyt5IFD*_A ztjb<=n`;n+Ix)}o;-`px`DSV8_1i_6X`$ZE_08mt9D!SIvX49tp49UkQKxzlC0)KR z(U0+eMLz*P`@Bh(cz>dwu`je&mW_ch5YOY&!6i7Zt$-$!1^j;ukId1@2YMKrRKjZ; zea?S(eMB==J2Ix#wdqH<<|ZL|&7g4X#J9RQs!5dC6%45k8#2#11ff1iyQ`55481a@ zu>1|2D{KIa!><4GJUMiedoeVWdhyNxCNlkP)hja9mI$2hnTyAk?z>9}9v$upM9-pjP!h|qr| zpUEOi-TdAM!mgiS*5VX?)Vs@`V}GWH?=u^5;&{3xak5kkYfZOAamD_Tyftfc zO_|4GUOY$cJSqBJl_-%J7Ool%rOy6RB|3ljTe&|lCRf4f>#fJQ2}}!th4fg!^5XVhtod;ZP(vH_JjL&F3Rz7 zoxoY39qSljrr8gajgUWGRX!XL_e3C+WeS4a9|06HO;`1K!}$_{_)*L0TNAlT>X5Eg z>8>9??OuBP$z^M(5m~mL^io+aZK|hdV_gLSL+hI#yGY^rNK}~i95Aw{mkC!xh7DEG zK27P|Ig`WFL(ECdpy@qcWv|+JAkmqql9H~WXn)SseMfugzT1Wkf5N8aq0O{eD z5CluUY!J7s9Uqp(;f8^|{8FLq=#4jT$i^mEd`f-Q**u)#j$ z@Ac%u0Hq6v;Y0Cj>bymL{WkUcJ4Co@C`A`PV!f&5DENgaukK(tBd`yACV{lSB;=@> zXDd>LRk*S4ueLWC0&i2UWqYMgf16fL^>=_QM;>M2HIckUF7csS&|T^%%YWW5qD8^N z-@LVzvNFCt!Fvk%Tzg6iO~Kt=J~vg%oN2NZ8Hhdd#cA@__*<-6B7s6)G0ebWJJ^gywe;pLE zp8=hWs=N@GwM3eBkdUv6yecFD{E-u|q*U2&3xixJnB{Mo@0ZEPV~y4!9Ma-HM-Nx#rz&#ib&2O&JZR1!U#8Z&z@6cRps z!vfw^AGh;ixO}dtK5(2RB|Uwl`GP3UyZ5$VV+@v=<-OqEP99k`EMdfNW!d&3pgs`P z&s{t)u>a~0827|aP6_vTuts5MV5j-f1Pa8@DBadD`k-Hq-jsq7{w_1@oOR`?;#L%C z%S$sLm5ZZZxw$HTG6X@h*D(MNe>~D9#rjMr(rK37Ny|Y?KP@WYH6a)ZaBVk2Y&|LY z&F~c?a(41U%`xo=Kg{I1uBfgb$XW>2=NsvGc;H{oaVomber~@Duu^LuojZL$8Q@X2 zPpfmZY2oae@NP}^wlb=K-1);d?Yn@dbZ+*5DHAUl>dO^gp;n}-obpaTI%9fTjY9#6 zl&VhY^ynykKNi+I4>Ogb2Hp|y1r|5QI)lYW_1UH@JkGVYXpbWe`Sng4slAicD@V$* z9|U6;rsxKG1TE&OZd!pyiIH%T^m1h-NmwKG{0~h~6<%1kH=alN89i=uN_f`$-^K!rXXsDBp|mUmW9x zFY=;(n*Q}v;$30+-LN7lZ0!8Yx0zT%R~|1b%&A+t0GILXH@KfWm&^)nEaN!yO=p?B z`=qTVoJlic)k)9Qk|5?Oz}l{~+-g$KL1#VUf~$6R%BiGWAG1 z+<|kv-+Q(?|F#MIU8W*;~MhN-O`nGk>1V(>DWB`RZYfKNpHLopJe3H1-?!4g=@uR+=HB zv^>GgizoSg;+BlQ&Ds8fFxoy10RCe}Ym&0SY9?zN=R(%z@q|o8lB{qHO#d3j?YZY7t zr(*u5g=OfEq85yW%BS+RZ|jIV+X|GzrMM-HH_6~>bm$m%m}1MIPo9k;+EFeCHfS7Q zs4aLYE1V34LSA<;l=RDbbA0_6tUeilv%%4es;Vu#J{o<% z?9QoSZOfk9b*06uPLaH)M)RB1*`SVw@#Ms&J5fXw&t#}!0U!IKtqR2yPfDTl>x$er z>LFd4%oP~Aq>XExv0oWb26@`niHQUbONa%?yY9b@Q1wTJ;hcZ-fp>>>nj4RlxhbkF z$am{dt8$1y-D3}8tg3I|GPrU`caG=$Am^+{8yyWni5bvAYlOkaGEL}vDI;+7kiL9R zX|#hpFJ8F_!& z0cvCy8VjefVJo$I6rKCel2JZzk4%Rv*M}9jB%ANoP zwgW;wpXIMDq)k>?3=vSwjv&sL$96`d1QYRz4m@Dh=Pi?kz^h0JojvSaJ#@D#H7wWQ z#>fWOB`YKav@e_N^mu zztv-@Nkui@@Uk|jYUB6Os_l(e)tzd*)nFNHf0jKAA{Kuqztq`nOVrj{H)hrc?Tq?p zW7L!FFt_u*x~L8TBB{8%s&F$a>B6aZ0km|?RT5G4fULnhZ=j+Hr+#s9aedfw#OSoI zW*lne=Czri%4j@@#&gnxipA^ZFx)Q)JoFv{?-jy&AG_T0c6A?S1k5u83X9=w9Z-YI(j$&y_3FlK$| z7dlMztLIBNMxZu;ma{fsXzv763M-V$W2Ocr!%usBv>)YN+POy%v}__%VRxcUw$ILp z*Flen=Sm^(UuxKIv|fnZr+Z{NQqe;JXMifP9~`5;)}~=C-s8q`%oQ6=_Mw)|#+Ye3 z%&fd&j1r-#H%npq30D#nyE*UXb>yKP(WHyBa5(9$<2Lh|`^-WHQ`6>hIuY(-UeD`} zRG^9S-jSr{IHd{-rq&tU?dbRUmKc##RJ0Rc0Ie1KFcE;)zG}!{qm3g=Gi~#eY|2%2 z>0s6QFb*z@4J=>|o351_mt>~OAUAvR@HBtqb6b1dCYW$bnbt=Jx(&O#a(Ugllm|y| zs?cRnA6A_BCst6`B-FFO2` zPUiP+$2_V1^pnzvO1A#sH;r}jEHzm>60gC~O3D$N!6Op>`|j)5_HeC>b4OmrLfJEE zjcG)#uPWk3FR4A+>KjO>Lh}dDn=R3tc@GX1hwlg zb-_x4?;VH1n%?i(a>|5Vy?Y8%i}2b|KA}I;GPV(2z`TK>t?A zK`^h&WyX(~JYw`}8GgFC5=Jj7kpmdR8Yb&TJ8Y>>4|g5A#}ju6YxdA& zWYsmeLsUVVzkcb@H6!c*4P|D6i4YTstq+(RVJqgxyU@2s84r8gFzQyk{7p+cds#~Q zUT9=ORK_~4lD^JT_TCt+hwmZM(Txi=Ya|^7XWDP*c3GY1i>^I~%*%aWZH% zzE#zBM4ztEU6~*VD8)dd!vsuCS%veYN7MSgspoan;GWiyfTlLc>TZj7a;By3YpgOA zvFwOT1=?AA*@!#HhLIR!!@TFv=2)xJiF7z1eQd`k6)+K+7vQe}h(CJI5IGf}$b-`fyge zbtUMuH|coRQ5xN-)$WyySg5YvxLXTL@2H%NC?o~PPtnTdhqiomvV(4z`AYPDuX!OR z#_IlENyLTY^>!WR@xxt>h*CH?vP}EUYjD@6S@OK1NmlO2c0H6!Y?XaX!YI`n zTClLN$H(1BhS$fdgOdhMi5>2`E~}6{=RbsO>*fy=1XUO>>oKZTN*(og7i3D8-!4P{ zBCfI!8&=Yr=%p~|_GD3|EI~6zt-V-^%g>D2A2oQkT&NsZ)mpK*U`5?D z^6T^hkycASO8Fh$wnlPbSR~9N#;Kycq`byX7b5ROYpklb-Ke-lWHVUSF-jN^Wb1s6S6J)sivRO^Vb%p$wzI~)XvC>)Y{ zdY;^=>456-4pp{a85QQ0`e9NYA=Jl?synP4H0TPiaGKo7o5Rdf2T%4U+<0yR>j6D! zo5xif^TfI1h)uJoB%Ptz85NgEt6*>J(kh6&|8^j%?%E>db+*UFVeguhuDh|`6HyAN z(&DNtyc47z%DO^V+Wn;iM_#1}zo^BLyB8AUzCah}5xyGk@}9C8pN@&DKD>~1`i}H`EX)>>>r@y^$|^rkEc(wYswL8{UNc;n7cTsatvNL zIK4^a_m3?fxi|^j_l@q`j5Nn&FQ3ZPS_n4Jng(ajIJsHSNt|T+3$s(U6}InqzxGD% z9Av%P%sHNk9_}*j!y)|<10x4A*U@f!H{f<-l1YMAJuGL4_Z$ufhd(>W`*R)Wi@&id z-7?L3Gy*tZ^Bo)hE^@x`1Q``YN$S8zJ_tjK^vJH!rLIgvz+HJ=Uu%0@bv|X6S1-0q zMK7)uS*$!_IqnxNQ>qlLE+?Sps$KrJx!xGTta3XMB+_S#no7+K(Kqa0^*KR}R#n2i zREw(;)C^IBwfGZft1sEj0B4;NPO*0#z}(WgFJ;#?{mSGJMa_pl4jWX)mUA&$aR%4;5Vo}CRc z-klXET0P*7l7HkXdV?zZ+d{Ng0cVcTda?D)k*pn^Be5Zv4yt#k@d}H`dUV-KM>St< z=;qr_T9v?JWBgU|hU^$UdpLF~S-W&rSQoCYe`F%87iNTk)>x}zC`Yf;$NJiM4Di#w zbqPbbo{=d5@~j7ih#*}CbPiA#>a0=H4JVbs-B+!BHcGz*jN39WhQGqKC?7PI=@5{B z0&7gH^M`UsNKOn89js!VxRD!ovU+sO_1aQJG zk2mgW%h0QJPZZZrB0q#$G8?F?xP=Bce)`}f3hU-rpRMS%eZ@VFuPJ6(k$YyncSH3s z6x@7zLpzpGhp*Q2M0Mk(`S`?u)&ac{bTWl~O$hzKXL)_g=+nZ7i6HJT}32H1ZDHk3HyqIlvZM%Nk zA8Orpms4oroR|OZ2}9rFEqwv%=7MCU@qsz(j(0RW~PY~!QFs?=7YvVP5WcIn&ngUu+9sLbSa;p z-hdo!hPn%r6M_X3yACS55xaS>7X3;~w?%*5UCij(T}-JO0#p1YRX>=*s(%pwktmoL zetW&FirC7uf^fo&c}ZjwXB50GA9R=&kt@29BUXi=B6T(r$3;ZzS^E`-|6ZHMy0O`I zKKll%!@FEH+1%4zWbmg#xCWWE%VXB8=<%7ZxkwK+6*4=LL}C($B@#hq#GcF^F6(HE zE^TK4|CEaegZ!{N$R`n7u#(He1u+uMaF42wyg|xxsezW(27;d^$IbJ^g6{U}V|VZp zGn07~ZeG~!X|yJ8`P}~Z1zn}Jk`;4E8f+Gp`)(8I&Z^uR+hacUwTFBtAXFafX{6Z5)WR_Z$}p7(oC}biLq;tMGrgHQtt_ z*IhZMOknU;w*Pc+*M91T_QJu+EP`0$n<>X3wccowm=?i{@t9K-k)B=gkDN)zFxw&O ztM^`N%?_WgU;ikQS5Wv8Ln3CLxVzH9pXs(sb-p(vTN6GGf5UAFj1hA=9~TRlCH4{v zd{1cv+5Jh1sHA#6K$XvSZAdpS9?k9Gi`b!53`5g{+ia`CbYb=!Xbf?2NLI9Um#7{u zwqn>^a8j}gZ?%B2*Zps)k3UhG05(gK(kjRIO|El&$*)vMU$4ZEr#j- zth*_(0~2wU1ff4_09l?6{0^o7FOlp8gZ{{f&#%6bg3qBWjl`c&b{hn&CtF4Na)eQv z2g?{Tthz~I)e8$17JViUUTzhBZZ}(Q7gIUWn}(en^-zg{Vr z6-{;<@5<798O-uKp_@3zz7C_>NQila+Ua!8QH*<(tMygO0ajzLfqQ(;5arUzT6Dk^VOc&8b9fgqxZ_j22ZAcD;E|{AFUi%cPSS*bTL71 zK)z9Dj-O7;Qr6pD$QNu@z^smc%zsjfw?S7>;p;m{+DmAPZ=E)6+FU~zv+V0KMgSXc z)<}Z8veVB*$HYqt18Zw!vuoT}%lu1s`J=u7p$ST1=I#4bj0bF1u!de()8^Y1F$<}Z z)`b~pB1+^Ic?$3GBsE$Tzty+53?pz=2b*`t17gpj{Ki04)O@9etS&4i7Pi$PyaY_SN&zX&alH zJBdk}3j+s;o3dw#CC$pMkeAEuHawTJhLqIQ%qm1*6kbCkvTAXsb7EOfR(xGfORh($ z)>`x1C3rOJL9O8EJU}MO%W*L>&Re@>39EW4S5GZ#2XGeO-7szSiADj`e3n!GUe8%K z?eipaU;N_X@#t^KR^)0{?(?DJ;G4&zdwMLodeuoTzMCe=@CtZ^;m|Gpx^HW(xz$xO z-pmM59H|dAbk79l@0S@IxH5+}(*1?u#}*9k(e<;>RY`4wOy9mEoIE7Y;+l#KMcdMLuK5Key<4sNmmTUAf9N$N^-Od+TF>C_sFT=?5`Ka%&%+ARgg9<0pi&=2iU z%lB6x58OsE#M*XTI+2Rs%QU?U#BNF5*xKrJWK*GrVsmz&_BnA>V3hDb)(u!gaVYTk zur6I=X1FYDi)aC6lxO7M{R{b>|If$|Ss_li3L`fxc}*r63T7UnHn@TM0TgQp+;Asd z!{QetzH0$&4UoN?8`oj;9zAf-4qxFS$BKuaMMWFN=H})HR}Gk@X2&zp<$2(p+Rz@- z3_%sMQf6v3$1>=sj_Kf)q|oX$yIMqK0R(phnSQI^jvt%?Z#m)~?lju7kr_PIB3BYw zzt%73Hbs0D3qKiny8KT6F!500Uh$-+6GcO?OzF>$%|LS<{c~tQrnj;a`~B}=!Sih+ zIQmIyJ(`FTFJH*!m}nL$iaZsETGiqtSoAp`xYXowD_36pE;M`qH-_78r7*n`XVL~oM?6Ztyn z^63|EYZZkHLJt!?K3{eBmmdZ$-I{JA8Mud>Qey(U2+F>_SWqIxt+pI(RfE@CQTBW% zcDhwITX%1WuSSpE)df9GHk^=pry`3-m zOh!sn4v!}j?P9(9zj!(y2$Y!TphGp_PtXw^{(^_if8inCf5byGXNY-?;pUsIG{58r zyQQA5tWW++N8a;KL>1kHgt;7bz4}T$3Vn%yri42KBqiH4v4Gw*6>JkwO}IH(eyRIW z8)nDDr6#EQF~z9`k7LpHwG8Jb+S5j(jv{EHzioTPZIO=z5hv;22L+VsMIv8Uf&`!tqwYFg5&?Ugry#_S{WoIh`ay@EDi<^m%a5dU&NPrmq^Z?Lf)ur~ zyj=L`d73HnJ}NK#>ZD^~Utc7FtCG^K!pqTJsLtn4oLU_eV-Guc+rWVv5tsZu#PFx> ztXIIeS%lSpZ@|C)A|UwI>@(;1@r`M*pF$r=&c`gDsA%-Dk0)m;5unK1qbpQqwfiU7fz`#7OuS=LZ zTJ_ppd`MEXu}Y7E8EG+)95_l))#;3rKzY~D{0ljxm@nh3zqZO~Q-P8OKw&S3sPr58q`w=|oVg??Wd1LS;Oh zvFTCTxVAC6Ra}GdZ@!KY%-NC6T$a0dM09#fP$f9UwC$FjmXq@X8Ho0cgwz*|nndtc z07Z_UVdGx-M@vj;lFinRFwqb^+w_Z$4|)TbaKG$xCU-@vqFn>X*wMO(fa3)UwD!=# zs-Jic_Go}Yf$eQZ+8j;R-()sUe1s9WH&dY+XtkT3mIl|9u#p{vZ(i(0ODzwY_%=_v zL0<4PAmpl1PW>k-xRGlD`2j4H{YIkyO!_>G7~&WLO49OEIjo}2d?;&`@mG;Iitb3G z-{%CA=c|IW6$fLN=rvb@$GnPs7Zr}9^*OO-HHDWa-ajHQ;0;!*?-XIxm}1;I)Vce4 z--!KGD0UM@EAP#@hdUcQ{yhOj5;!7LZ!n*jFf3`)yXl&|A{xJokEb z4f6%s+XnspW*Dj>mGsY*2+#;aS1K^Sodc`*)I$FmLij+eNdW;QyA54aFaWLJwEZ|q zqtjo&wdF5x#s81sx_t}`@Gy}Fg&rxAYY3{83eyINzW*$uDGE~PqZxz4r0-Ey)rM@N zg9j>h_9H^sEN2;?c3~U=joFGnhUH&r&l6YF-VvdY>z@eQRw*&_J%8IG}oOp#>(xRj3}T z00L3!GI(o6suL)?G-xSJkZ^@H9dgb_N<}dQ7hlJr39!=jC$>&$b)k`uwMCLJ>rI ze>k?E`(YC_BxDF(j2sA7t=7Xz{w3ULOo~2>a#q4$ zapg;H|0U#$0bJ{Z4#NMBafow)1F5MOL}Q{c>XicAaIGppNT1*sT794iJSvUhgwjR= z2jVE!!&ye!53UO8BS6HvHc{HJX(FxPq+roJw9fd%*3jN-!-Njt%q-lQS94G z1(nMv^gPV9;g>rjpE4H6^!7xk^gYi^=?=XeX?Az(0_f{51@qO**cV-7$Zws7;FZH& z|Dk{ZYS<6@H_0(!?A$)OC6iB3JhdPMi$eZu4f!Gmu&ksK?*Y?~zVBS8RnP@_976aZ z`015xEXK5un!g1Umd2t>6Vc426#X(rfgxpk<_1H5af7)3%nf1_ebkx41XVn;m{$Q< z!>B^Tc}e7{xRGd6tuxs_0*5fmX=v=0&F6|C0q1IhKC*y0(zjn5z+*F$64GHH0S2y^ zaV5>GRWO)j6F z4^MsLi}d}H(RuK<=uNa0D7Ox!rC;+LGZEHKxw+=_m@i8iAeNag@pb)0P44?Erd0gT zG35@7Kg}PH)mIojr2;@)ZdpKSY%EGRE~f&vOjmk=bXwqeu2{bZ50I!s=4UTKTzQ}; zHWFd@f$Fpyx3#oY;qC116MfNJG38<|!#TK7um*(~Mh1DMJrC)=A74am^gx|gnD)*3 zL+(&P+?)s?zR@2AYTw>KQ0LQ789lV-A#wzBi8h$h(pMTxgow;~&(P*$AF&5WO|$<3 zBQh#mrfhG=%O@erhq*gS4788hMa##Sga23hREq}~(fmG@k%+9p&u_#34`?6#-3eT@ z_YY5cPM!mm39LYs0V((xS~a*UNKAE+k?B2?C>2{B;^IvyG|jXtJe;JPP|SF4AiQ{g zU4fI}n88XW0`r3(K{4Y)2D4IF8MqK0#)+|6*!A>T@DhvN36scnUUgK@JYB^5r%VLf zCO^$sh3OiOOW`P)`me)Qf>3FE4QRSgIuxX}zeLxPnlV{WeTM^>0fTRT3CZ2*uk;`fUCD5=iT*=RE~B%U&TZiTS7cI$+a^)s`g-6~T67RMrm(Ln2kW?DdLz>}ydD0W&kPV87h zLOJj|%fA_w6!@n|4W7#5FO_|nw1U%hLMGp1Flc$BQ!amsgrztkYyoSYOazce__{4F zK#!;~lzygKjL%ep^YcFv*?_bqF5u^rm@Gmh^;#`F7ElF4$PIvFRe<7`H#~|!GZGO& zuOjHy$oF-H*ROf}Drz-LSo{pH(BqWUfKtUT0i{RID6w1m9>(gbbo1R|E0lTd{o#e_DQFI>BjXBBMHM)|`GSLD)D! z6$i}-Gx5@o|0+R~zmcP#2>eEd8KZtZTx;>1$^Q-y>_wA8wizG~l8I*#3*f5NoIuK=b(>+Lr2S;X2wA8b)L(k0(*eE}QF;4h+?9`E=MYB~sbw0h}sEc?E*| z3TD@ZMfU3(Ph~a>EAA^WnMKmvf^d`eRYd#DZC&J44ls~w($#7PLRKEj z`fpJ%WpL8MgaU7XN)j#$|DKCqr6~7p6XVQcZSC-zaL6SRh){zd_$dRTe^6`S*^9pi zH`&NBKY>hhjM(+YMI01X^X3S}+dFY>y2bSBLu-sNWsCf{?-E7kxVz~-mlmCi(x%?# zU~kSEP-9HHQE%3V+iZHF$g)gszkQv3}Euqm!Hug8z4+ zN4k@3wy$VySfg+>b(Gl{5xqhH=_Z4-gs%C(yeN^875aEB?#N-hlj7>8WkYwWY7cA1 z@vYIPe_f?uFfbND71khR0J~DxW35CKL7`i-^CUH9qghrluV7vQ*1``5+_7nY>ND-o zOMFhDZ=Cuuy$2Xj|5{YTVtBnXU5hEiGtSN=?<)&2fUv$R1bQ=D@x87+`>gDq zfp?kZ_wEXt?s;iM8;7##n=Jx3A1Y}ftUZ|-|~Z263?*&-~i_> zYd_Z}e@Zar0`>ss^B%B^1L|7-FK$Z?6bjzsv@8tx35D+|z^U@=mb1}vq_W%xtnj$+ z8Mh2!3!=6T)(kjE+c=m`P^!H&_q$l^aVg6fIaF;RC{ASUNei~DNQ@kc@qqEm8&x;6 zQu|~v`cYcsvEzk#8|b^XVP-0b1<+1uvzKzFNrId@8C!ymQ?WDKmR%K6drTHyoGRtc z!3T%|!gY9o6Ya~5nmCG0Z&TC7l{FNozLWmz6BV}w?yeS7T#B9;5Pd$)=mA+dfkp_Z ztA=0YQWM=}CdK!LSA2NYnaieyL^NR^+iu-%+zv`M6+sHQU98W)Etn2)8(Y`Ae@}S7 zn0Sr=kX=P1tUn{4*8rLSz9z=!13<<9vqeW#yUq8ZvqgiJj^jLnrcFJWL`Yt)h3JeF z;=B^24dZ-b{)I0&S~O4>b%t_B4%#8qw!*$qi#bu%<{XlP+Jad2+4jC>4YC^qP!E?j zc=gJS<4r$8M{iWnLc}$kG+oZF2#Vr+AGKE~bHdpS(q?C}i>Lj}tLO}^giwJ1L#?AX zpjxt_oOEmD+vfxH>Q?sDNd0e0zh02?Ic)f^3nl#3Ch-AeHc)0QN+TAQqT>23?iGx` z?5`dn%l9`wYL97B3qKI;k)cki1-yLg>oX-T3&Ca)fXg?{!?rt-OkOH0Ef||AqQxa1lPFlF2 zT3rEtKumIyW;LL5_4un+xM{Zvg-VzenraB$t4(%)bT<4+n5rWAje-!6CMJQV|7)i) z=Jyl749?msy21!l-hhqxIS|E2#L#kZL-x9|pYIxobXYst)ra(B%!*{PvGu+?kxXYr zA`47NkCOT*`UnE4%c!2o1|-k)^Q){s*c59LhkRnkh>jsn?4|NjsJGF?iXMRe-vk!V z;THMd!Yu*tRg~Zvr-08|1N250M|FygL*1u;zDEtAQhKLKrp&u@VlgsQdGfZSYiGTU z+gmX!uT$xzU|o+RwHM(0Ir>n*F2sJ`g>rjpj+91q2W|=g@OHSSy&D@Y@9**!nicg` zZv8wFjd##^)>RG~sjU_MuCPI^u7w=0p^V@{X`CNGvwPBC?MACHx1CT)EzC+gCsSOR zwZZOvs_{vR?=qMU#`(~uKTh@PjTuckH0ELh{?H6pYge+RE@!+I&%#I48cUJ0H|ULd2GuQnDCdzMs9!5F zyZl?o?gRu1TBJ`pXJ#Jxy(n8NV#Lu*d<+eR6F7>$0^g+KPOn}*8BSD~6wg}o4QCUj z3aV7IoN2=90H7=^K$tmRwZRyt4GPsOQcHNTyziud6(^ai2dV+^&2O!)VfjTTi1ON2^X>v#Gt3nnEb|v|oDrA*q`yhsubI zyQcDc4I^MvmR#$4P9B}*VeHy?gH~^G? z@{!2dFv6(zy4l3^0Lo{IOFV~}`o9A0j>WFyAM>I&b9XSZzk1y)gox=}1eaYHsZkmR zo!8%9RO+AZWB{CaiRPUI)2F)Y4d>NRU?N2${*W+4F_c&w2TJ^Lylp2?g7C?*{l45* zfMSKAxEDdzN*=u$9M;;Ka4C=CX#ub0TuOE5^4q`(6>+>qwnzz%?VR5`1hJynnAXGA zN;px35kxbY%d=@r^klABqgbNtP2|75(-I|I*Rc9o6$l5lv&E1i1NRKHA1jlh&4LH0Xrta z#PKP8&KGd;H3KrlnHHuBzW(|n3K>Onw6ZQkeT8P@+cb8o zuD*zO@;W+^>FKx*yOS`yZs*@C^}C~EV)V^>Grb>O7oAr;iyi-pXG^w3SYm*Rs&5NR z{<4_w84V+8jPfXTBfgjTdLAqv+#B5&j2P3GY8?rLymx=ib>ph4pfz9RjPvcWAt;z} zGl?iQ4!N$}(>!jruc9;Bg4G3@8^5DCsQBaN+qKEDr^%25q<58fJ`V7B$=H)w>Atut zd1CB(RkP&sN705WMY1^fE!AQ_0DorDEk&Ram0yAfW{9cW3a%pG-un)(RLXaX#)3{~ zDLCPsuzH(}@@X26L47X4-{azIC`;^obmysQzfc~Kz1vc#T2o)q=ZeCl$RHeL@csr zGf6knc7uG55J25hHyeZUMtuXl080MP&5PG-rW7W_PVQu6EF+rrcOMjFU>(I_b}r$# z4SS-$Qz}-ER$oLUpC5~|h6Zb7BT|Vi@m9eF9>wNs>@yG4|}ustq2$`LDga=jyya}ODu>1Y^c?K zu76n%D_H;^FLC5RIly@o`v42+i4xEpQ2QJRC~pm;`yqeuH3guPD=Z(5bn@L zGcm0CQR^9^b^DFU`wWF!MyFXz_KNubMcG?GRiSn5!h#?rQj*f$9n#$j(ri*`2?6O8 z5TsK=KtSn^O$$g!Y)ZOALb@BI{89hDdPv}~1rBEh0Xz68gcslF3`SA*?fkYzlIm_okGdnG90o}b+FjQQlyhb_4w6U>A> zu>sm+g$)KY5a>hW@Sejww=23hT{bmGSUAmK*w2!=k*_b$ChXozX;V@I99lsD{cXi> z>-cI@A5#~qBo(RbCuzydgFi4f9oF*(rkw<30h}6l=ow=JAiNv!5QBMXoBX-kl z4`20CrF5LV8(aFu!2ctiK;Po6FpORh=k=EYh!tDb(%*yK>HlAHgq9tgaoR?j3N%xm zRsxD2`w60YTb#UpxNgrj5y(4R|8f8ljliZ3W5*kG3X=A0x@Yl0@-eF$4$t6$;$U(r z#(!3j8V60Xq2cV`7)}1Xe*N10cdR^^Rho>kgKI_>&k1tEzOyeiDur?t z!r4#xzPOR>ek*4YB1H1tQbF2kqv+k-hds%ee1S_^P{KJ7HWOsP2ROr+Bu%58wv}kx z!||hutlzfZ2xsF29!6fN3NF~%|#@7P=!8pewA5zgY~uHP9xfY!ZLTVjyeKMej?;fHZF&fKDN?*XdUbHUI{na-ExQt)@ zpJdD^rS0Ov2i$saTwGlKV3IzO*vY8I)BTqFd^1@w_#tw8&!35aX0*&hW-KGmW}IKQ z8~)0+D}=oY5PloFImndXqoVt_Qe zHq4K%gKO#+Xk}}o>U`3WrD*%>Q$Vbju0tozYgn&hq@j-O|40L-V_H^=b_0)UjZK6C ztN=yw!1~EI!f#@4!;=dOU0~1fG4MdDKg|SmbjNw|sVzJTT6F=ia6zE@fACb9s1VS! z(j|>0`dI6bTYa2oK;-@6kKm#jWhxfzaL_j9js|(F`9#^keVdO#yZA`LcSayFI6};2 zDv{zbW+P2HkD_z$9_x*>t%S1F3C(-P|}#a@`S{(>xfRbneJ&} zB6l!(nqA$=>`$V>ky2WZ0rk>9R};)42d>10HmR3!h~qgXwkTM=k}24kdKBUu1wTxw z!zMaQrmw3@O;;;s(;V`?7kqMtk{JqDr&J|BfZP>e|0`1hU*N{W8(auK{B3ZNKnw+B zn)XsDro0>+Rgr-@CF`%A&AFl+^M>F5+66G$dsc0>8wNFLZw-6z3e;I7(c!_>+g=yCZvovNYoJdkZ-0MFlM~V6ku58~gcZGTR z`9Bmh@PI!DhrJx8NrtXyj6limp&WyCR{F3w-0|9GVJ@F2WzZv)7DLAb8^^+n8@-Sx z$YdNKLAAKdip)`YmZlL#=9Rs8EWJVy{M|%-Q?Cw6j@4X)xy8RFU$i;a@u5g{BarHv z2hof<3A^6tqV8e14xh=AqwM;`b3^Q+V#nnUtX{6OBIdnGADYk`Eo}>QdX@5*&2x;V zvKj39a0&dk4+Y-4o;#esJ$Im`jNLy9RP@77&-Fi$?DiBtepz?_uheL%u%lSbPu=XF z`hYo(llH8LA#_xVIEgr%g*`u7eTRe%`hb{|#bAo19+omqVne54Bd?txu-W#DPMHyc zIWr5(!D_0#$>wN5eo+wy%;x$_em<&w-7(@xg*2pSe zsjNAxzAKe03I-MYHCw=))i zEjLN9=Fi(hqrVgC6ZZHZn7U5}hVZ%Wu^M<9Fqq}$k!&iW>o&8N{D@Dg{o2^G0v0Zh zJ_>(}At4wE3Yzp8Y7tE+=2A$0JkC!Ip1Z#?BA&nghVByi{KZ7)L%tk+MlD^_WjQ^E zHX;^i<=6`6oSg2zxR?KOEz`vQC1;|TUG^C7qsI%0uiG8>aDV5uAL#~alm#N^kpuKP$$`ZP*U7^puLf2KR}ATp4_* zs#K_+sJx)sI7m zLrLUv$h9&vAL0OJ6n%yHlD7A`58Esk!N~8upt%Q#Yh-C3-gFRfQCuG&M?Uu<}4Q-^G86)C%oG_feBhQ=SNIEqcOAiJLymbXRKCrfZz3SEmnkkY74j^ zlvSEQ?cas{8R+Zhgp;C*4rKXyPcu7)bCgZFkM!TvcQl-Q(OZTOX2L79)x}?U^F~79 zdwP%72C#aLRG*$NygvXw+;p`sxej#cUsdXr{*%MiibRcdwD`Aqa=Ewb$tY=3DYqU? za5Gq3W$42o>7SG#!{XC+48o-%6-1)Kc^+9tOv{5p6QlJ96NL{z1r4F7_Gv{)TV$ag zmccH)E*q27u99q?HwI`)dbQ@eF)4dRg!4VSvAVh~C~q}>=~6r=B8M+vJ-hc8n&`Le zr6n}&tGo<8p-3qHf0~)q+MSvC{maa(yza~l;lG<17`aaT>mG1duld z4&_h~YxLxk{i6&+_Z?{g=0a0jI=eNf^Z@T?ZhovEu0Lf|KKyaTMVDH9jx5t2N1fO^ zM~bP8ZltYd7zR0vF<%+F%D>RtF($^}Ix|0UxAPUeF`8PBEY8tp_k8`F5mvoWMZ-!| zofjEfk%^MX_8;M?%IVIkR*Aw@VdDQz6{t}mUuzzLP4uE%Hsi(RArli5%f*#H z+cMtW5JQdB)NquPm9=^~!AxZ$nDr+5OtkhmSi=_U{56fmJNm$4B6~wduq5U|9~h_x z^92B(p)ZT*FGLP7b6$os$H&*lq>u#f1_`<@>0*FvhH!?l^Q*Y)|CpoL3B1D`VgJG$ zcS+<0AGo9crG6EsQ3i(R(=arE=|2N)c;>hUD20irQsa#tM~(WJ)~^z9s-i?sbxUl= zj>E#xKfmo^(yf1+WBuKT8jR9Og;JVztBr-&-OKRl^0)W>MeeI4@KbZ&piMDWeO%JZ zd@<4KTfVjp8L@J8On1Am@nit#o#U%A&+KNe(XmPISbqif7i42XOuS_O2MZv9CE5H7A($Y1q10Z3h;`NF(@IePF2vuA@?Ub;JsJ{Pdpz(N9#EO-*-p$(YLyMu?D{y z8<>w~`0NlsKUP)6`z&3A7?1v$gs0~>i~4b5IJ=_AkQT_MUsLsS*ykA)3y~UtozJPc z6Mrmr-ljX6OzCm`%$>A1ero0i?|;lf4jWA>w3&nTqc=sqQbxW{coIpTotG14hv*Z; z0GcVo69xX@nz$syg-QDO-x|i#Q=4{konumK`#=rC&Aa3y?+U zH|>mQ1&NAujq!^YemFF8U%bwC;fV@WEzhVhe3!I_swjR!LV~C4{X-@*4EJe_XUc1bwq}!_P zwS1QR-fqgp!B2in}*Yb_$0 z_Yj}2pIY}LqZuBe7U^I5lk z$Ujb>Av1Wzz7w6JL8HyxQeZL{E)fQ_-NssC$0xB~R@3aLv&1R%7Zks6ukS!?^ z!hBq-L&fT~UR7mC+eZHS@ziLt^?cbw)DeE=`I*F1jHT8~AJr;fyV43uG?aAddjYpat?*`B9dbI72dmUgy+oqvW7`4b`J8483(yQX98gYP!$7)+B z9mjowsr5YN*=DL+Zeu6TYi?U1n6}sJ41Jngg$fgieVo z1Ww87leAq^bM0B&7QuQi+gaRI?N*YkM4S@%=2jipBPSLD^(Dw_6n2~jRDpil>WB>l{zP9~ibo{1B`=MmByn-Cm28?gMy z>hW^O?-vZK&dKYD9|aX-tK!~=ETTfLy>`sL$P&QE5 zE7)990+9{hJ|Z-ZG?GvIBBCkU5U>)y_lcp)Vo^K5c z&0a2P$A_|!&8U()>s4LlY|?mWyER#Tu}cn_`0 z3(*^EYBwnc4=x|dma~@rm!K#pBKeR$n8u9QHm>8ZOV5P&+lk%zk|k~G*>zor@FSV| zQt4Q2&4_r-aO`@e7Y&|t%)a`RQuag*j<(%Bonjn){%peXPU#1U>GA%P;SDb{)+}jc z=ViyAE{XDkY7fR@(le@FEV(!%dN5JL3LmcIw4WF8zCM|lgqmaY^!7GEVo@PbgrwfP zNnm52lZ0WdmSnh$6H!1LWW|aQ4#%vCacjM?5a%Ti770mYQ%zw%4S28FMs+Xn+V!V( z_xIZ|T##ixf0kv+l-6B~9E>^8`Fli&Me%$UJbHZihhI}Ol@JPNE828fIw@WT$xdNa z*to*>H9yC z-nWtpI5vZPx6-Z|zBk=$6DRuoCjDC+a93ZG9rSH-amhx8wh%2vHbOyl>i*4pqrA^0 z(;vDPWqzz;=Km^xl=I66!OgJ5TClp^Fz=b{VvOi0wDEOp)bq+$kr935J~4Ae(Q@n` z>3~!9CI=HYW1c~uRTw}%6PuX$p{=cLeCm~GX<1qD6?m#s{024OvUcX+bjIE^FU^F4 z-(#W0AHgTslaj`(x?)Hu@SE;C& zPxH55NjMsH9_6u|+I)F7Z~eTLICyP2WKnS5jwr85<3k4FL~{B^+r7a>rca!mQ3e&} zRA64&6|pfnaeLt2&kUq0w0(urQhc* z^W@U0Jnhz3o@`{5^Z>ey<#7A1m>vO2wmAc#|A_ixnCC^UwFm5m+w<(lV6bIN%0_tk zSFOcgucKd%G}v34*Jitsi6h)+Fd8|mI|;(sD0n%E`mlb-e_ZQ5xo}}o`R2;T?se+uWCECup9?~Fdc18Aul?IO%Cdz($ zRHnU2CBQ9$F68ehZPT4w zSM&MVi0Y#%HZ%Ci?DxlvpW+CZsr-Sg8RE{y+pZHdDdr|rqHNzTVS|`%Nl{QY|d|IAws7pn693?FAGAV%Jm`N^hn}tgHn28YT0q<^}O(+q*RPMMJ`I zyHlb$V%>u2uRqn7Hs?g~*CJ|PP9VG{vwrjWA)1VopTG$9*RF50gB9@ebr~7|jDnw! znhH4j*g;c}mify7r{Cf??-~RKt20``r#praoExs>e~y{VHh996(R2*SzNb3GyfJQ4 zZN7J{BC1pSS$$y-<;{;84l!-=!t{}bvv#q8?>Q+9c&1dMg0k7gd#Yl^f+E2d0}^ZR zI~T<#W-}qw1g0a~5yT%rk%&96K$=j=g31Pv(Uad?iZ=i3f)ymN-d>IVa0?SJ5TAb3 z1oe>k9E8U}SHHc`UsH>V+x?u79B>p>vD6~UWJ));^g4o4!Sw5iJ;b=_4T07r9e31}Q2 zIY*|U-)|SUvakm6WKacSfh z-(>=^EJ{QN`^2vT2)#BqT@3Lzpu!u}oBPOdS}NWrzYqvyNV^=&7Kp({8c+$rdfC7ZQK@o)i6_Vj>(n(>vG8RUAIpLu%|P$ z)GvQqOaoN)Sw`lm7Oui0M9a^PA9iIvUR%VoQxX7YO6gBa|MIt}8Se7pC;&avT>fV7 zu!sjaJ8H{2WDLuoRK8dOZ=Z(a${5V7e11UQydhYedkuEiDq?E9G1 zd^1+-3T0?#{UhhXRd1|G#W_RQwvhJlNSRK0aLPNvU{be#rDv>oMHR1IVGrSTX=fal zbHr|Dv$4_9mjj|_qXxe~Gfaf7{$zTl*_eR){*u?p`8ETF2zl*lid$pEEdd9_H4>h#z<3kbyu7qGnQGI8s6S^6Wf$Dt{>RI z7(Q^itV;o&B`Ha2{Fw{qjE2|AM%1#3sE}E*Om}>21qq4hC+-pd=A_<*M6!36ZSnD% z52w)Tc8e{DSw7#cg+kwuqM)^>_{FBa2B516-8KVh33N7SUnN({!=vH40#Za{@V3Iy ziUF|ExM0A(L89j_t9GU2HR6VClsSyu%I$Q)4?JnJoPb~5OFy8zJ_>M~?I>v<1}V+a zW#x~}fkb1!rJ>-=Ey?AW${&8^nkrJnta6r@2^wmU8rpB=K&emuoq{A-w&+T3M!2SY=a zv)E`(!(r&ab$fIs;PHbl^lLNAH^aD`Z+S{+ns(`Sq@0#$2f-S_Vr_}@n7ibbk`+;I zUYZ&6?WIi+5}ci|lWl>y&h<!;2*D-IaXz;)DmN)MOHKD=Yd^s5W# zu@8UtBzwY6FFIpDTMp}__P#sk<8veRyXFH4eH_lic^zVXD41-DilXz_LeufQ=t-el@>nd)Fcm9K zO>c%dP$XqIH~p*8tXtx)+?OkOSMKxL`}yl(k`_xoeC1HtxW!YTKW$&}gQ4NMvjM|p z;*oU>%G&&4r^5Hr_zx?gcrT`M~*Gd!9TlfbK7BB%z~YpXazSe)kft3&0zXrZsGqo-9aUr8OM zP4QNJH(D-#xxdiaBVZy^)U}qfkYkP#8Tb9p^1{O4}Rr2hlcP$<5fL||0U03f6xtE^L9F!wo+C+qz zDj+i+V{B*hhCD3j)7aK0yx5`YVB@F?Wtw4)pYJ}r3_jw}uq%IMgJ!M6jwgPbxtSsB zFri(Pc%)k8wiD%(+wA|iXbjYFreTu-%*hI{i-A(2KZDMAd>_lu-=Wa{N~ulf3Mw9# zNukhyS+BnQec#VVZadOPemCb4H-g62rl`AxE6w-oxsQHX;kF7{r!`AgrnWEz)yQJ1 zae9ARO1Re8XVSIe`D;8ALX>->M7P|ZpY28M&~|6haelF9F-jxe90f7(p5reis6-xn zMGD+Z4f7{NZ~yFN)4on$<^y>| ziZXV5>n{XJg*bM9Fl!J8KcWe(QX=cre0Fh)9#-kQ9{+y2HG4ejOf5@Ez*a~D(f9QDt&wwaH!I`iM78bk| z|Lq36jLmJkkI5(VBk;r5b>nK`p1iCW0!;@knOi+h^KfqgYE57yiOE#v-auJlMEuF# z2-er_OO9*xEN7-&=;_m4&12zZDc#}YGm5aJV_Hkgy;*$eT_42cIZu)l+r~~gs@}o3 z?idGdevaB?)wDsZCkZ(*E6dy=u4k4>UE zy`b)jmgOd+iO0XT!20(5y-j+(1$QqtlLl>$<}E!{JfpOe(tRo3Phk)6NkU|XzdVf8 zR8ZqKi!w_$Jo>zNC_ZpnIzxOqZ?jVwTsa`vd~)G_HopZ~cH5b6+SO}5Dpv1+l0-bp zl)Q)3hL&%e`wEZkGfBE6=`{^p<*WrYN zf6hJ(H4E2n=@YQcm~Qyt;jp7p_;GWs8iSQo}I0E;~~v zh6)WRzytZDF)!HIyg5;3oKLkMc#9Pxv83A$x~3H2T$t%fy%shS`ydCc!h3! z4(`(P_-dCm;y)Ji5WVkF>n)2%&gqDoxlhrOdD5~1E`E-fYw?VfL~7(aJGys+KNJGe!V7dU*h(5&dqim$Xn`%x44 zq|sU*iUDm4(lsHagyGUQ)}u=%=J%JivC<3$Tegz9Nk zunfMr^8tnX?D*a zi4K1M>#1sJLRISosDAfQ-GP2QVj>Bs&!+Te0f#R}&F}M(ox$~ZBw{}7JY8d+w_dn0 z_IyA$)8y{pE3{kq0mWuy9>-6RBq$S+C}B`ZiU8Ip`>Yn%ZpI`z{g=t7w+9$xhSd>2 z^KRGtf2gbZM^3PGstSmF8)O;6s}-a(_WkAzn~Bdekj-JwuVT{}x79qJzSUqJU{t#y zsV=Okcoc>(d7{D(?uUsv z(+t`75wH~=A~L7Go|I9JsF353p!@{CR%&}WrT!48is4@Sis<#X7N0kU~ zM03mXNE?)+p8& zGA|N4AcS*AlnM3|kXmHsOoq7#H906_(fs-Vqy7Rj2D2TDZZp@I=YbYPtsDg>2CYIy z9Tkd%W?w27z^c|@90LsqkBC#{xN$#62VvC1Ad6+NoA~ zDrwZGN-)Gp!v_Ezpo@3@QJp6A$hg3Gc;Jyi$9A{>uk-C<@Zs^ zmKY7t$u_~gfuA2G+(W*`d-aGk5AZy= zRV=XT)T0G`=AhZ$q+u7%__Uk47`+0C{I4Ji16YSrbkQnJ*kD55=bR@SBl0izw>?j{ zAt}OcrLTOj&4@F*IdxCs>9`Y_^y8EgnPk1$T+|eCB7ubilZWe6f6;T^Q&P~;&M~qV zhfaEvTWdQNm+pP2$gExbNr^ypcoIizs(VXu?SAQyQG8#hy%~z`-^^_QB<^e3pb5dGp$&Uky14*@= z(UBxt*6ySufo&#>65XWNEu~aC8!u?14S`}`UQ}9IN#Oy+s+tx~eO zI`uP^@bpuTi}%{1XJ#^@i#T*|nm-nbn5F;Rs0pg28p_DRH$h{^HbcXmHz+}e$`myc zSaj&cBqS!vfexC4_3!qj*k%Qo$P@(~x7$dkpF8}K^++j)hdCGCfnyoae}N{icF>ay z?2&tIb4^#62Ahfk7b_4+E}F7Ae$u5@;VPr$dMNZ@G#L3H+VUsSl@-0sg8d7YBVh{H zfie05TcwUIi}<327f=7X967jiec{2C^w?z)AdGv6o*>+Qr#2Sp^M`oTwNV@qysjMz zDVHJM#t*;sATxtYY3uY)2>=LJ{M1%hz#(&EgV@r^C>?`O)>vQ5OO4JNkQx*geVRR7 zTf?oKX`_ERjy=ue^sSWbDfA|Igwb*cX8gT@%2V%pGrY?FXWzmm&2HuIZh0NwLo#!8 zQel(FvsS5UhM|w`bwcQLC1U6l+?^t|W9anO5;%K%KBPSf55+b!Wc^^^xzevA{(wOS zB6p2T%|;`<1OSx;r5@s$n}PL*QCkhoWZJ`jS5aVow&6gx4Wk zL^$Y9gKYBpeFtXhoh<+C)c$f#6@1l(WE|1(X4xCN0LsL-`d&@UiYes|jZrSh!yWX{ zo7%OwOmYMG|E(ni9(sSD9Fa~&tt%aKT#>MF5!6c4*Tqf!xQw$M04{naB&!+7)t3^u z=two-l7!rwe|ZU~%_mu;)SHDj2WLW{rN}t6R zC7Xf9CW;J;W&1V-r*Jes4`OdJ7|8CZAL9SfgWIwYkFEU0?=A>g>i`*T)No=1uu2LY zqho;}WTyHi3~J``;}QI3hp^XwH8=C7rlwkfU7%UoC3?i3f^qyRRFYAof>wvWyESu^ z@PkHo=_+Unp&uuj$UDB9`MSXi>oeG>Q0EFWnL=Boru{Yy;tj|9@P80|w8Fs|byKR7 zq8bVuol(973bo5}$S^|sDVp(j7P=`uTxCSHcTSa?u+1*aX$KzxIATT(7sk*BxG>(G zdVh*4HF}dGDBnG-`R>>=5^1kx5~#=b@~iK z-*5H?T?0k&n@cbauV5puDsDx;jn&**D?|wA1?dK$BS8WJ1$6R|Aeq%~&?*6D36uum zGU)JyPXMQK!sB58qX0Dn9I-wKT&fwlplkM{(giXGLAGACwOTT_IivA+?<$WY3%v%{ z5?4{T-OrCr!9LaX>$6`;xlTO(WNHa%jc-nw#?67+U!(qm+mC5u@!;Mk4)}Od^%%9} zttXX+DAq4J%VcpR6mv@^ewWW{Xvw)FSi{BcO=vp5_pzAW?eF_@ zLb7I2OF8+iD8?aOAw~Lj;rWazG_$xSXrzA~P8xw(sJ1X$UjWb-%)9vXV-qhBq8&;F z0EpiN2rVPC(=uLQnC*4nC$3yWi-R$6U{z4*#RB1#g?a}x7F!K|k>qtLy!nOHN-Xwg zou<&Z+1DXH@9fU+cup%uv%IHd@myd|Rqrk*#ut_RjRc~%|8M#)+`*3nXH%5J4jqx< z@thbTp}vffCU&i58#BB^x8gXA?&*;h+;HCaDs_4R=;}B+WC@3^cyRZEDgk#d&od&j zN+=?=dkm_k`ZWb9`;>;J6wufg+Dmt;-n73EYP5B9sM_`UuDms9&JjIYT~FsJRON}x zws#TpCapF0q*Eo=fKCZT-W)7quZcikI_|WiN^C)tOr%`wA7ca2yGf&s6@;*%zZfpl z!-RwVD!Ky#$ElPH0&w{D#x^r2CkMx?WQoFrD2Q4%Y>*+tJJX|sTZv`~bT#I(t)`23 zsLRM2fQ~DrE5mTDa=0}KXkVXX8lgfn^NInyg{7(ueUw&#W0-pgcaq7T{iGZQ{VN%K zc#@~4LjBYKn!X*k7#^4m<3Ij`=9{+8J_(5Nmu%nUO{Y%3Lh_e^L%&G1Ah;>r?#?J< z>@~dqnIV{^DRD~Ok?*|KQZ~~YpJR~I^Wt5(|kLY24jv2QBU^NIs-ICOvQiSRJc12oCRniN>9VP0Cz? zw-+|ROd9{T+y9Pf} z<+v<2U1^C0(&$v(ZLK4~EYho}+r3uiJ5d+?B8yH=X_5&*ox zzB64Nu1nc<%Z(Bh$YK0lM5|CU6#QhqA%tj62L8l*e+plCosrtFI;qc9or-xnO_g`3BCjpPZ zy7eFen=ju*VCFkt@mVJMJ_d)b?_mfXQFzf^iu*S11YJ@a8rA4_y?eQ@oST5@y9dz_ zj-ZSQxdSSN0H|Qu@4$4d2HXy5q>im&k9D2JD6q{c7<;W@pLq!RP)T(@vDu5w6*Fh% z1)Mz5C?PZ+>$fMU-=6BqA#X$z1!1>TwiEihvkHe&(Dpvfl>n)a0QdZ6Z%x^mdOGpp-_pEGR|OA5P%Ln?_r-@8nvL$D zX4&;&p&_=JhpdptmOmwnM;E!M06j2et|tY^0jx~aKadzkP6w$^V6m<&qU{p=WoYsx9qHf+>8b_p=-w# z0dL}heBF}`hT8;X?hC7?RIU<+lxY*l;Y!Bfi~Vdhs?^QykTj@9xdK@39%`Ko9C~j6 z=pEzy15<$LG(?oXB7`C7cfL~-V!FpFaf+DFw{*quCjxIiuZ}T`AcU8Fz@jJ(&C0s? zSst@J%9{(hq#Y^9OFG23B`Ez~ z5oc@lt1q@gq+fzdmpgD)dpZxFp_aEd*cS(Cp_RG;{%hG7t%_#}b(>E}wy+Ri zi(B&BDGb>Z3+r>SVas8M+(&#;BYOiI&Yv>Ix*CCHKjyf6)U&OW@QJqPu`aE+y-BTY z0A(*XH0EK0MgJFJb-|SL-8O(9v9$xAsK_EZV zeV_%Wp(;q}pudB97nvk%g-dmX+aY0JVjcpB%8fwa0Sg#OwSs0L@71r712!+C81l_^ zo|5P2sy(0pbdP-ay+X4Go@x`*15Q<;aSSD0d7q-qZO$d>jrN^8t>=yppBj=;6>N~R)UNYqb9v1 zWmY7JYnN&*O$Zzf044-D0sy4d@&J&Y)itZNSV%9@}ln$o z&I+LEHw&%+c3*S@==bJv_5&n4FsVHXgK{jd-1F5;ELPDx3dpV?7pP4uRH zV7q2$XEep0LItblpWh!<^8paYS($<-v%Q$F%21j?Ax0)OGxQ~g8S6_P3@}{sX?3TY zHL@_WDY`<3<8*i>5uDY{@6whY#%+p=AxW?b>LiFy;MPkkfRG@VP5Jw;P1uNFK)<=| zsr>RjilnLm5o~D2zS+(y7y)T1`QA9$wV)@QO4=PT@|a*og*KX|74u2{rICeY?*$Zd z#sS2P5PImiM{#6z9r6{P>oO{H4rrejZsl)&98g#KX|RfRUmJf&qbh*4^lD%iYiNj% z<=IT^QRboMZIdF-Yt3nb*lszQVk8K*$rKTm9=g%>hXy-1l_%Dda=ubzbk4(;ClfmSkFkT!UW~0WZEB&cSt~=AR+t=N! z@$80r!u(vS=^|9!+aZP9zpue!%I@t+F!lC!5F~Rd2`oDFyDSt*0(R59LcUk-3YQ)C zyq~1CKG>bTr7x|=uXQM4hZ`+lAdb>Nyyag5C|MqcM0PnLl&Q@$r%BC-Q>a=#>L#4f zD(rDnv|}DwHBu^>ctKa6C?|VtCLdG1_~WcftcPUuWt($PtR63IveJwc_oVl}UJNo- zVn2ekt14mszXuMUCxL zm8p~1)xLDU4G9HVK4DKHo3i-T{`>K0&_ioM`%2XStS}yMfApQ0^QGUoI8N7|P!itF zPp^wd4*HtN5ZSQDRP*5Ghek&=pWmDKB6)v^Y(^)co>0VWsxt!dF7CcU3w)>AFSL1V zFkH-DYX>d{z9Iefj}Mq1LV(McWYv_flkkHasBOtD=BB=(PM@G|M}emx>u9Sj0fCoy z37K1S&`l{q0PDMGRqCoo0mU*zvd2_#1{pmIF#JT=DC8rm*UDameOc1I1S5+H?}kBH zg()dT(1BDoy)fwoAcvVw!ipG;bHWpj!UBaYyk)n7n_0t`U986n@edND zq3ra3%p=M3-%*m%XWyUU{J7D zk+vF0N7g|D-_@1B9OI>$1i~AQUk>V%$f1;K3<-HHpSfGJP*IC zdk{Ijlx(}cgUAiPq~1*c5Y^npfTM>ZsZAY0{Rg~>3;ftnFx=QMqVpbY=Pm0C3~kT^ zZ`dyUN~Q^3H~0g|Ja&qY6QD51yi)E?iiQ%hD_t-nMfEy39!v%8epMht#J1Rwkksr$ z3&g09d2iD6$^Cr))#g-Xo>i26>9$8EaN(u3C>S)QG;+vrfA&RLV&!t}n10%2!q9Tv zun>rJ+R^=6q|4JoU}j&==Dh~WM+d|%BdjM%?W`!c%}^ts2>cxUX~J-dNGDa5$^4VT zZoc(B3LeJCW2v*5OuSbD-?owd%V|tyGj3q*D7XJR+%UZU(4~v>S*5QY_7_z z8+Rx$W(z!fCGD~`jv0#a?tCLJgO@{Z@!+bD9AT12!U1G_R z8M0>qc7wO48ZY`sLrKNF>s+0U%lk_yd$-*DX5P@lmP{SZOmS!537;`LaoEaDQAISu zRUl>(PK^cl{X0^a7+sl45RBadj7uHA*3l%JEZAg0NS^^$F;9TNERC37{WT2k)_wZ$v&|rbO znSQz)^inZ7zPxM%a_#!zcZB*R!(SQGqW(|1)K{UR!%uCiX{k~+=4*HX|UH30zrABN&#fvzu7 zm-RtO5uvx$EU?2rWD6>(O*|RihiHaiDN4wDXU zMcSy)S!$ay12{ogtK)UG21wGIuE9M~z=G}31fE``f%5(<8YFl;5()4_0WXa9rSQI1 zm)YCDMZi%Y4PVH5+xSpBNH^22!suBbC18UO*q82z1tZ)S7dL`Up`Bfe-w!&Ue3+|R zRt%jH0&l0xL8mSWF!B#qdgbA4XbPXrX6`Z!4UhiC`v;g0ciLLoj-ti>D2RM8C~Mdi z@?1-E<6g=HwuLbCdnM~JN7M0e#Gkb9AOb1D96xa9ilCHf+PG{CBeI`P^5e?$L#5@wFxV1dHB)0pY&Q#7 z9gt8)q7$&@UoX1|rRyA#&Tf=jk1_9@%Nl7czcX-4WQF8AEVi}FbAlek6cRpbnT|00 zkJ?RdSCoVt79`|^#KsRm_~~SjQ*RaRe;5Q{A0u zXEYs_Y5@wMO9)~EupmU9{Mt1Pe!H|(ov8N%};Cu;*auGz2>`zei;r@FQxGBzz%dZ;;r%dg7UgWsfW(i8WfqXx`r! z0QUtw{dVxkA&Cn1_T_>a5*!%&N%Y;-A!*XmmBZeeI#$!bOt30i=QV!Ji2M9;h7)C{ zKm0@S7_6Q8`cnmh88AF7{C@(bCljqw>Y)%8^v{J1f>@j$U3q!Lgj4gMO_u3e`wZT2 zY-Z7`{U0e3N-3?h2;t3jyLXhD?UYH}p}0o1^;kXtw9^MutOO$%vo!#MuP3v191vCJ z89-jX0TCkS^+;|q`bj|VD+F?{1G_0`P8=-asHZdSmVf!J$20jTQbqBb^Q_a_xcsCJ zJywG7Gk_k%F0`yFBOcJDKxla=HF*1DY38fqUPh~Ykg0* zR-bc*?tSSi)z1%tJ@XGbIwBZW{wM`rpbJEzTo6JigQtk~V)0b?b!;#wOFM340f&XgJoXPIy>foBMcum3;R{xT}duI(Cz1wlYmIuz*+DFvicLP`YbP`Xj+7L;xz zq?Hr_X;4C>TLh#9=|(yw-fc&(>wfO>eLtQt-ggYf`Qt1+jt-^VIB9s^H7xEmL+DF~blT7matFAAEuc2Xhnf9Q$)rT+KuHCx( zEu1Q3+x_I)(-d4|E_5^`J}>yV09hJ9=;`1FFjiRefTH*yObZ`fqucV1&0X<(V@qZ6sxGrsT}&CdLi zmm$+5v>gfzUO1n9h21X-d`1c&z2)|QdDpqz5q64^4`tmi{Svt;%N@ zQ8K_qfxd=Cr^j^H5BtXs%Mk>v#m`UA@EE+5wZ0R1DTE|YSsfp2I=~BRQZYm~r=XW} z1mZ~u^*`DOjdMJOk8eIY-MX;Q_Hv2a&Z0Pk%EymP(phO9*3JjI%y-ii-p~>lgWe+2 z<(jDX>G7mtM%4l>nrm&)(4HD9cD&~M7SWGSX^P$~eXRa{nYja?1Y95L{VS^?cm%M^z;YAy9(?5#T;%LWu^mWc9nG{&_h8VaR?@m|__?4kS>x zEmHUUiyqFt-hqdj-eluRPy93olnSbeI*>9WWPR{ZLnPkWVn#$CJOjm@`qJ(|lRzXq zqFG5Ag72!sN>a`8zY7`!yZj?0f)CBr3sw2HW-m4rW|3c8A>EF=QX?CX=ZAKhky1)3=ITq zP{8{JN2F4ebU)+#B*~y6bI$Ofp#UHsRf7++jW^oFiEs23H(dHtTB@}KIlvv0{3Z~U*?)$S5+?kl2+5c}7%Uq?SJcWDhLckHfVZnjU(l8;{= zEj8`Fe|(kw!CSGwl!e>Ubt3WP{iKQ$%AzM~e^LYPOnc)%aTwsOKUgHrcpR73ti%>;=@#b1mA@bPHh4Yr6wIzHFw*!WV&i;s7@HlW>Sq;7xa6p?aCfiuE@*EDhD% zOE|&w^?vY#;cjpR3QW)$aFrfF%UDM9lh}9@M_f~LUK*1m9({3pNs@Lm`vmq7OU4#*z)2q$Kp3byVyT(nIyM3e+SfqFAs*LI`U_z3=* z!MbKHZ#bWz7Pm5FAkTXBZA>-Nc3maorP~Y4B^e$gIJ2BEmt;w-(ba%NdiCC8hz5FM zdJ|xF(&dEWCo^;^Y;>XQ~k1<*p0&Nc5yu zb5mMVA|C73xD@N#b5FZ`>#{eiEdq+MYAHME%0e6sPF(~j-WXHIvNJPrD_!BG%LZ$g zIa*Rmcq_Df0{ZI68PY>LG?DCHwvFD9dTPKy5(C^)y)iISUvJ|`!dl8CrQ;o&(U>>X z`xel}Chm`TpPdY`CC!_}*N!fqe3#(;osEtW3UFAjmpP66g0uU%N?Gn3{pIP4!NTe-`jYPsX zJmlJZ4%!1Nt?xjH#tJsAnY{b4Inx@u{ZyiM=J(xHz9&fK{4#L024yf;m2sN(@!cX^ zBtB;oRnvGfpRs)5Vnv$5<=%(G^gOg3;v5eN#wBG@kGL-k#ucGf^ zeu}H_UL2NS6oUJM-Zqn|)6dO7JlZ=PW>a(RHP}T=dC=@@dmfUAZb@y%uaDxC;RR3V##QFe_A$;}!s3?zOc-2XK_^yf{Fy*)eg^2YMA zA63W#+uB&=kWl$$pQ}CbU*OHeXq&26l)dC#_ZYgk@QvH&b-*_(6RLX}%o(J*dM(0Z z;3!rey0DQ15AeQnDbV1k0TeRvy?hVDG~~m#0|xN?GhUY7LQh(uYvXvak&LXz#>E9L zfre!7E8jocFHFu-?+ZyoSMDL3+`>5}1{ z-V?>H+d;K^S~T8f6{}C7^|PRI;y(1WRkqyWX2o&$C$A;|fM!A$#>?T3KO<5?!czLR zuFXt2VHNA^qUOD*G(L=x8B%vDOdNnOvIN&_UK`iV;`Es9by0|NRIio<(yda5MU<&s zb!#PcJGmwnnI)hti-Fuys!6l+#>k5gh^A|WA8oNqG!GhDMYnsuyZQJjvOVR#EF^RL zXzGQ+n+(Rb>2mtL;WDcs_HmZO0Q_Sk^u0x6;@@>=UPn;iufFzxA*!k2!{k>-17OCC z8QvRmkreuD-c;u(if233nOPkvbu(RS#XP=_>3 zJDMmz0aqC!6+Gl&PAeHsfA_K2FhP6@>DkltDBD=KnqV{mLCXB5J2Z`_`he9wx^T%~^u!ipwC+6-j9ybAis(5|@Y!}o-# zNn6>$`jhKC?o|)*lmf6zR%cCc)Bq1k)_KzPwL4tb$Gw;;pyyj90RrlLwGVkLBpz0+@V zN$9ByrIpU58$VQ)djZQcd-KZM_^bWF#$*hY70eb*+oGj!TSbIU08z-*skEKyrS4e` z3W+erxmNP5CyEvWh@|L)ai!T8jxyfiAFXM%O!v(Szq8=V-b5{_6|SuvY+4d_Lq#oS zJa3hp++lXq^KHXH#gY|!^d^f>4j`La5(!?55JWs7q)$R{sNq%fcpr$?8>iFCJ6s1H z*zCgYdzshB1<+(#S1GV*xg;Zkp%;rzMq1hs#XH1Io6;A~>{-ez`u!nu8Zm`m{%AH@ zX3^x`1U&@z-O)ZznA6LQPMdnUr4t@SSX?17e&*`5wWW&;{|>)@@@Hf-H#@J5KB~Ij zgkHSw4-C8HvQ1}x-+XtHl_#&eGH4s0)olv-qPwKK_uNsV;}}1Lhyj>pFg%M_fspelWoXUZ7S|Mv=>y?caX1>8#p6N+ zL+Uj-iGPo<)CeE{g#4&o^wiL9m(q@J=gZ2}w>)nwR!cAP%u=#P!?K1ZgS! z_R@vS+R3R@ol5%bF?>&l?PEX4l_m?KXnhY)JXS}*NAiu$vQwp>Jw<0=Q29n4OR%XGGVM^uI#AS2gPdB0dzwH=NX&H%6X~0~^5sTf=;G5wCHS zuu2XvE+0$Y&v@E-tJ1tx?Z(K<854zJ0uJ|joNG$8&h;iOGr|aWY*`X!oG_kNE0HHg z^$e4AZ7qmmd9D018KMH+`}f5Y|L2Rx$K}K~U4oaD+McibVWyQGDhATKQ7DH5t&SMN zTs!L9z0{v`k0et=wxS}E2X#jM6}nz6o$aF=~qv9Pk98- z64Po?V^|(_aHd50{MiMT4cBh(c)LfmN#^^;W|+qn$^dwC`?W2+`a8brwDtTDhK#G48#-zX>!1BeBvm$Z`={wQjqCt&#WS(JvEz zmp3JKULEFITND?;H-iDEDV6loM*uX;W?iTJNSHiq6Q3Oa3L9I$;BGsU@oERPZXIC- zZu3**aPL3l*Y4l+e<$|0H~9pb{s2Pqpl02NKh|~zaD3VH`==;5NRdNb6y0qiQ zh{+4$gXlMa$f>{jmq|ks9Z;d~QP853URa+J4)f-F&m- zrsy6i(Ynt)1%SRX%gXL_R}=0h_^Gh@PoEaw4g9mFCabJGRAc|lb2Td@!t_ZLNrcJf zbaMv22mb(X?Q{G?@^i}T{(6Df&2!xH0s*p1^{^OVOQ*Zf6nVwtRs)3?-#ouAgp&-k zr|dI$2{nzO0w?H-o6B1H&>cMQ2Qe9y5GTeblSbFIiVE?KZTbi*#3LPj@<9q7it?9E z9+58T`t60SauLyF6jz!_iQ5$pFx^5V>iGnbo){)hD?I-5Pyu5wQX$h4Vq*Rqt&AO64fSpj_9tS-={m<1TGq%tJQ>O2@fY{N| zLUNMNR4k(-({~_2y8>WrZP9g;K%7g+1(J0`+8`9>sQKu2hPJBhZ-P8M8HQ}*RglMH zykN~QavJ(>`ObI_oFHs37r^)LaD1$PrZmynqzo-^4xoSf+FALOFd0?MQ>$w~psF7& zx8lL6m9~(FoGyBHY#(^tHy`^hsGL!6{{Q&~K+fxa-1}$v??sP)xLn(2VbGV~>nJr|KiJHM0~m1>NHRYzT9 z)1|?baDwFG~z4RryEK3PC;Zsh}?0{Zx22G%!`+#ZV`fbx!nXZ#Q7z#u6npc|B znJRr%ee(ee;%xY%@ifVm762lrdKLeDkDvVZT$e%k*&j}iQy~e*>JP{IwDG29A`3>>C?)5K_40~le0zDQcybk1dOF_q75B68=ru?Zd^4QN6ag(ozE zpl2YsIk#A=>dU4j>T>cs!7Nq9@=sF`fd#nJ_*)cyp1%Rihgu-{|I5Q0Gn@vxI!H8o zstz~i>NjLw23=(qq*vXz9~yg7I;0~oY`4T0PETlj?IlHy>wwY+Hdo}e==vBE%SvJH zkOOe#_grN+6IV{dF^io3LkA$GzXB2qT-n8-JMjv<;)5{snFCyG$VB@QUI*da9VU#b zmw1(LE4&${zlm%$fD&~%;f`HBsB^jXJa-51oD!%)_;L+gru+gurtnI{V49s|I_9s0Zb7@{^4NX#{q5|Ea5nfhs2h z)N3Z$bBAFwG~-|53FEfTut6n{dkBFofr!+7X_+aH>Dm&7-FY_)arf88%q-Z3AGyO! z$_%(x+}0s=ajYL+oz1n&UN!s&PzN|KRvJJb@<4NMwvxWSrEe-8~zKiXjpP{ zKYa4d@9^nJSABWPNyjp$$M(DS%V8e-uAms-O7M(h~Kv z>32YmHMOpx2OvU&15JsH2PMXGFp?3Qd`li-3L+K-RTTJW#Xq@0%vl`CEYyak1%`TL zi1G8(2uNSwk zT*#p*Be0k`>1b%t3@%0H7x$WNnLpxRZBV^WcH;|JkS;Ls*XkeB#7*xO_|`^RKTgdP3&ClT*j2*MP&~Yxhc0a#U z{!)qZ3`ta0a5O(M_JGqKg6a09ZW+jDg<8>WKLfjZmbfiw8M_Gu*4JR8_9#Y?&oDhP4K}r zOga`6Vzm(>lh4ib;ex-Z5NQzAV+FB2@|ck@dG%5ldRwq^kxBw$i!4kQpVzQe)@;aj z>NpNjc1va6FAiK`;*9GuOV!LD7>b~B*-g}lbN&9QtwqYE)AT&Y@hHnM<3Sp>Azb8X z5Z0J#)7Ydp4=uher<$Yyp_B&OyzU!=&wTSt*OAd6nGR`4cK_W%A%73uUce`so=WSw zj+=vndiKggP-1+^<;#T*Eo#F5Aq1xd$o%mfg9CvY0%OmJ_n?ZQVVOOGyZZn%F39&f z|1CwnIfMi!BD+ARvJa+iN<0pl;oBtcavY%a59F?KTI%zg?@6;WU*M73lUC1Dzcqdb zzU;mx3&7U~%kAk3aZ0FIg~T9j2SVB|l2;ce-Z#K3X?n!@Y~{oLFVClX-0n}ce?sw) z!hg&a*VXyRnBC4$O{#mM46Xdd3-5oOYpVF)k>LX7E7)#hMU#0b()4#rucm0Bas#+F zmHuo+J-fJAzbH-XYyKA}@9~`<31GxZ$UwodFF4l{690!1XXxO2uJd z_b|6%rnt}jOaXQj%8ZU+St%>1!R55XsP)KR)Uu9hghieicnN=Fl|t ziBR;|sK?0lINXYXhGy+RO2m!pnDn0M_fj+E()7UEU-madE^)hrZ9%*Iqsf|0n-BZg zs2dWgG@TDbHa2Ti*TpfhFgWO%s7~bO}`nZzCg~&879ZrGtu6`oHS# z@&B6^g_0)_Mu_sXV zkj?P>uKWuP#~oX^G#8P7r3rT7Z$7-ki8L}yZH7*k3sY8@T%9F#~qN?<~M_vs55(+MBOO-RB%2+z8v+r&0?Ta}ms zth)_MzU)OAzLC1z|W~AlYn=DEH3_7$MmTeaycx7ST zOY)AK5p3^yzn`LB5oNCld|&eBz~I_Xq(6RfhAhVCTHidV%Q2^htjLKHtH=zD&Km>t zh|9krOvR|YV7Y+$p55Pz#6}?_Qc?tEH(UQ>lZ#HR)AA3w?YAZ|c2f<0O%}k_g+)sd z8GxAZ2#5*V8ZC5^|A4nIGcxxtUb^xYptajH<6wduRO^xg16G46BMo?ZH>EbQf1br3 zH>7H-dT|dv?**W@WpSaUSNgNQUPo%SevVa@7KmF=MSem%o~6g0D8}Cxmh4IXhm7c; z{X<5ma3!FA;@*@y1gzw=NHTJllElUKAOih2zIpG#E;krQ1l^?DfLVW9ktJAa*Mt8$2ECO>FgH7ew zMqoJ43VQR&tMsc3<%H9ijl=Q5vr=PfURuDQV3$Eac)RO1_0pP1k;@|BGQ4b;AQ_t@ zZ7gu~zD<64NqH|s}`J_16B=f#+G$@#KSjf@B`qUc( z`u0wLTj+D?=+1@4E;QZrueBnZXZXRP_Xg+$5e(sq-H)NmL@eK7y&mZe9yOFhc}LfjlX%12c9*mIq;7>l_d~F8oC8{bOt&i;D$NKZCS~cNkg4?oVw)Ud zd-9V-Kf|-62O7^j-Vf|5`cK9&$s_#^1Y0-8x{e-<&R0jH(?fQ0y#+*xNtAWz_+Hrl-kPTB2DWLde!I z&5DYI@j(jUTZLXHM_%ifMgggP9Cd;6j20VsYZqH6)YfQ^&+GKrx&6r?(uBWKZFmEy zz;}=tqfZ2`fY1lo(8SwR8mI%o&hkX9Ac#BY)33z_viW#oiBuIrysNzxLy2X(ZUGxu z($E93<#riybDyj;(9`?9(F0Rru}B7AT=?_66r+K((Q!>qcX3qB)*<9eOlTx z&z6R}L|jR#GqykCT!JcT+Me5ATV>Q&Uzn*GY(GoBa*lztSG;Azm32V%+Cbtx=rrz~ zx6;2%xCU%Xz@%^Zt;HV`!UA#M-!F4~k-oQVaQ#LQBd+Xo5T?=}V&g7)<&zNf_w@yO zTP#&ZuKKdEh7jNy9!!YzafueVL<21=_JNDP)ot zYGf;1@82?!o=`glA-zRQ==Czk74^p1<5Ybxcld!|Fv<^C$yrCg>equtN~!MbRIpeq zWM~tVQ|Twzd!#M;HOTUoS~nyb9)IvAC4vRL&O11ig>7>c=Doh4^RiCl9bWw5PNHin zG$9#jB4nvPKMyd$^Bs)$=U@3JjUu>3i+F)qNHF%1;WhcZ;6J~cd>neVz&MBY)<5dB zXugdT(7Y^4{AF37Dzk^mlh#n2N)EZ5nHMR4tbtB)%W!b9H^ZX!+UK0cx2DxzV<0Xr zDKW>hd71xXYHJkOd{~w9e4| z{9FaB`sYzXY7t5l5w4l5y((}KIf`gj(d8R*g|t=h5jrPi31YL?%L_+nVS2B0f$g;Q zWa=wTzF~8)#aS>Rv*A)}rq_Du(AxG&RmKq5%%~EXY(}VnRgGGt4PED|Qu)QX7h)y2 zf4{l=ag|q29HP(3!7&?bke;Cpec^ev*Og2`oOkwyU&qPv?vmhrtm7xIpxu1x1G&wG zpy& zYG%NS&M=!A?p@Et3t1RmDaM2)^X(Yo`lL6JWgxmKT4Dkt!9_1!Qk8@*u~4G1{Y&Lg zVul$X&;2p0r(VS9D>KWAgL*8-wP4vKJNS2m}`jtGz7H%lvL82{7gfZ?(~8 z4{YMSI^161Q@luv*~E$>`cd#GDl3?&Q;tKcr0%M*lPI@TlGxXEG{Y&RI3Ay>zxDL` ztFUnTgD$R)TVM8TzAsU{_NC!$cfMCuYD7GOKriTSCihP z$KM#M%1GnSyUFo8CjPv|`E3?%PKJ4q@FHHnWRkqI#}0LL-uWL2;*D#Jj;G6C#7BPM zY5O3w!6sYb$KzvUVMU6XE4qmcC4q|7s=ndJ;*8yNfzHPYsP7t^E%&B06++iI4{e@E znKXW1Yge+CK|_v%4jYB_SDo<_UoZF^x>R35GiJ2uA3}h#&C1A>eg(l1HZcAkM-64^ z-&k7~ZL#f5M+bq@k3atD(zqLeFMbV@3okpV4OmaqBxuH%5GNmziC0vxlqQwq@t*f; z`ff;#n|byE9nJWBuGdrl(sfOI@C*vBQ;e6LH+G7sy>ts(gMNj)2Fp;Dq*RRoO}Nd7 zEEK@Esu+9y35^(tFA%d#l%bbJYT+1)c--!k4^^i?sZ^&=bJKA6`(MP;#st@0T7%dA z_##_k$+U3O7IX`ku%MBnz$~H)d1!>?_6Jnyn#- zho*H#`|0G*QrVNH$oDSuEE-YEwYRxG)YzH`d@&+J+_Ac6hH`7&B|hYC?bs(WvC`q= zQ~{LL*KBwL!j?HTlhcrktDt9GPH_jHQ zp_lq9s{vft*Vtf?joM%af35*d?CzcA11)O(viXk;GHPG%!@>LJ<1OjMNHe*;Cm zA{cav7qVVkxSKy&AeLHq6uFec>?nir!NEYoGb5lFR>GvoCU`}2K&-Wk0*ypUtSp=Y zRaA`z8$*lJ>PZI8iUeO^c+A7Jr8ye>ye2%G{U<6FUf0Db9Q6uw&HdO>yH zI-$i>#(#hpEp^*v+@HrIE~DJ1!jf?#mx;1zhQ=iX8I(DdR6k+raXk^xI;0&fD&lMm zN*P<6{jKM4G>IbMX)c-~>=wg~;brr>-~nET{@vJ;9OB*@`xJqq2x8$M;onk(@YE$1 zGaiW#BHK9!gWV36XZmZumQE}j_J+Ccx2FV@%y%j?A6|a&g=K)ru9>Cc$lK#j+;@L} zVH2DkK6;IhMVjeGJ{2jLv%=*Zx4wVzjPcsKJIACSyoDAXJ?lMkaV7`#AMN4 zIf{ezxIw+jyZwaH2jlx9*TS%cOgf_dp2sqg zPtW;HsO4UH@i&!xw7FXb?e9qqnj|CVLY@R#(B>bXQT<#K6jk%mCXG5l?ZS%x`Hes- z2JWf;X{ z0MnNoNJe*^8NUnQK8<{lHb_ps8kcv^b(7EcoTeeVa2dmgj(XA`xnTFB<1xk^QvQ{5 zDl{z=Uu?=RNlLck#%(1B-39Mw6)XGGBAo9iH2NoUL}W_D4{|AHSPE6RUR*molo{o- zBR;9NufC%*c#OW-H6_{SkSumD((!avYqZ*2WG(gZD!J~lVDD!KixI8o3t6b;D=Gv7Py{H~|}%V$x48JQE^wUK(Yvdwb;!=9X+{x9_2-&03^_1B}<^?mL?l2Py{iOev* z&a&s-oofs!R>wzTYW|IAXUpdDI_f%sgapA?LqqF;A!TgZVb0LC>K90MMS}JMS`GbV zvnlqx#zZ)?<@xSGB@zWT4NR1PEX$#KX*)ZV7}&PE9K>ew>y6;b8Wq?fej$u}D`i6jcJ`g~i`>=u5Q07A4r*_d{a#1`-0x=$JxOzi1{A*P3i0Va|U+Cc{oz|1m3BtSu zl>67#lDXTXmP^rG)K-K8FAvp9ZKa5_CGU}#CVUua0e9k8EbO?ntManbP);Ugd3+wCoQzHCV^W~c;_4p0^eS4BhP zz2Rsnq>ugg^El;&wHIw^r)A7s{D{fiUk4?s$y+G-?_4^`$^i-(rk%`jlyMi{kJ$e32@#L9n?PFZ&FPWNtaUs`*U^`=T=kncRE5TcyTV4Y0J4LDd%b!5Z zmGv{L^Zs@zsnwyqa^I9mNS4Jd`nH%L_TM}Yf`Sy~@LHU=(kcWb$H zC#3)Q`%|PO##=N;U_HtmOGTvxw|(kExx6R zDr5hCf)Zbhk>|VnF;n}*;P$QCKi_yQ6W;ngDz0Zuc=tp2x2yn?j6ca4F3cbPOwi>H zeGDUL{aztwSE7(5NBW-ABzKx z!3zNt#u}*NaRPRkXedPI?Z?N50a}Tf#Kr|)<)Ujzw;Lms<9@m>b|`8x$JrqBrz1X)xAnMVr!<<8}9@R6}&T1#t`ym4y-xAY)wXq(9yys%H$!H18-U@|4Eyt|}2Ab!O@+EeqLrEg=hWv1TDKb{t1_$DV zw`mt~#;$LX#(w&J=9|HMvSTrN)GReB;)ty;k|T%z0up@{^~%H|H%5#AEfK!%AW+ zFrcrp{N3>YJ6=RtS&xx}@nLl6LSWJh(!9}94po(uEhP*oXVz5j;PApTK^mQ>QYp?1 z4^y8Lz044*iw~nk(@Mn&S|Bpy>b0MxCOWcsBE=gNH6khCkMTeG47^)P4#T?GSJK() zy4YDowW}4(32Z*o0NW@9Ygv`zYoqTZF^*s#_9{UoSdn}=et0Hh1D=(JFYD1!H6@>+ zumppT2^h#gJEpgiiH-iZRLcENsm#YKnn6lsezCMA?gQ#`eISeDn=CnP@xk9|-kEg< zt`nhGAGv!m5!I&4$tG?ueCu6bvwL?tfY$p7BmR8u!#Cg=6Uy$2+%HynoE+M)B#B`z z-b$RETJoXp;095yca{oXYSVdjQTC_Kj(yCA*rct)y+owp91F` zi%9reZ;u$xuYSbfHF5Q_*&o~D@G5Vx>X!c|m@!?s9eYbBj@ND?-zoq`J8T*~nio8T z@n6$5;cPX|9pcnSZt9uF53u}C{0R7%pC>=ZZ*3^3EPrSFtZSurd zrX6KnBYHBE%1S(5aPf!jHto$1yJQmg$n6 zQ8tA@maHJ-y2WFGe>5Z2*hGA_WWTAM7TfB_d-3B=hm5Bny6c72D2ML9>tFIvi>C24 z31K^6@-m8OfnZgic>Lwq?q2wxm;U_~MSvD)GUA3Iqe{uQ>-=K>XFI@*uH|t&woYzK zD-dtnmi=_*Ym`%uKZOM&{`#3cb%9}hgdCIozNgx|#uEl+_BR!tJPEvC__{dtK3@F6i=XU0kiE-4Tu=R+v3yw3=~-|2X?`Eo-j&*-%48=N{`GR=artZocnRDpy(@NE(UoQC z$FX_RWh-AxnjQMYel)l8#+kcaG`Zh?BdVtp6KNaf=FFS2*Un*pEYM>p3#!nK!jT~sa5kQT-6bh-y{i=?0|8rct9er( z9&j8|Z4(3_^wX#!vaT>f6dM{-{_J+?rHAZWyNQ9}^sUY8naN;L!)AHF-U zBv$Z_J)}5F=;i=7)!alFie4^h_WzZW?u!prETQ22U^2oZym%X*vp6hbF6q~A)BB=c zF_eyy;n7*S^>5m%Dkj?sR7JBC*XxLG5;{_o5XTIx7#5Gsm?Lm6{4kQ&0ZOMw>EV3JDE&VI}PFe>4y4Uj% zsQVWJ-v0>!6Nx1APNGAqHgL$S!TvBrF6i_Ccn9;I)HX_LVivVPVh();Sn?UVb?yJN zuRfZw>U**GAnEh=7#=dc!;vc;(X=^F66^HI#N5ri?aa+2*Pa+Lc@Nykw&){1maw|C z!>#Zd=BcXXkhCjn)^{oICfezDW;^`eVQzFNH}Fo|(x|3u@-pAOtUa;)F7j?|fVJP{ zm3u2KDI!G%eEL#QTT-C5sKYY9iA3s;Y`%wKsT}^ZTyDe1O95pj4NSD9#Oq)a=OmP6 zVRW(OW>&|;bZ>4_xG<|uR_D?kbT}R_5B`$5#&U2IXFYj0nb85qq z%-4iJ{L^)htOh>Z8?ScG0pg=^2n*rMIX{U(t0zVjCj)gk z7Kzi9KgCI$*PbNK4-a;i%}zOGTYL}}jS1H8ZQ|_yZD88_OY-PC6yK+P&M^gI^^tch zwoUsJ^3y7G5oSI_vLFL7>G#i6%?YXXYn+&Cs= z9dfH5Qi%^9#5TfqR6!!)()p-QbPjK1y<{h1Ny>n^&9wc$(;HZ+2O~*1IF;kM@BcZU z_Fenrau=wjyY9G zo>aRBJW0$)p?;K7DVn~DPAmNAq}q<-){+|z0Di-q-XEV6<-%-EyULQD1%7t454Yxr zY9f}1XbHCHe5qE3;}px%_cI*kH>ict;%2x_+9l|9$}+ade`m0b*lT`Um%enKTQ50! z$6|@ATemj;%d8YS(eGi}{tws#y4v%C7Ixl-9h*SXh!;|I< zcBBkg9A^sfEOnu7ji(nVc9yc8RZy&LzxW|rYeY}Z=c436GkHV)`C`D^*1-Ad+XeaALx)& z!TLBxX#5y-@($BYSha>_dx*nJRfRdaRtO14c3D|jDEDW|lcVeIT7?>g6n=Fw3AfF% z!5}OVEc_BwlOK83kT8MW6bBL0 zgBRd6ECV|cZ^vw_!(eCRold-drq&q0a9wr1QL+7k($QusWze?GSVh>kRAKphY-XCHDk?6~hwR_sM_EJKWI{fgscc2TN3M`0z0AaDq&T23ZVb6mM z8fY9bFyR{X`uuiLmpb^#Qe&7PEaZ7m3eiTPPk}&Z$@G8kLb6F%pO{$0Ie@VeYeCPR zi>4JYl%H-gPFd8@>NMJ*p(hi+bKO{5=_uFRVw09!7cbZZv3jiC$1>^FGX8mI>Efus zre}WzzfIT8jcJF0AN*C#9j}CfXxgT~<)n!yj%6#P+oXl%*!yfc)69O*^kerv@fyB;&RIt{5L3Edhu|Z+ZK&LABGZrITCVYh3LSnA)+k44)q! z&~*b%S%Gno?8=);Mv$Gk^oN}3CoeX_g$qLWmG&8;;I$c*0<|g6 zeSG{gFaYa~)wpqHj<}UI`Q9tM2C!TreH{WcAYeP`7!m%Fx|P=5ir{PRMC^~7L2=yJsRu(W(yO+;S= zC6P)+VZz1KGIn<(wmIgEI?m$1G#YNdTM5zqte9hTg?>#g)2lbDHQm1_RcQgRD}`^2 zb-_$I^{pqicZMfai2CL_^d_yGSIi%*`~uRlxx2@tVux}S&RZ4czSCP62m(j;e}%wh zl)E#RVcF8UH@F>dwg&l;IW-U{3yIr($*ohT?uf-o2gCEs&_akgZt?H1fL-n^Q&`U8 zs`c0rcHR5ICLvejmF-RU@@a#wmFB$?L+msryW72`!fS1>qwn411G603Qmak%!3W?hx~?twfD;+$ z-s5V1diw4@_pPfyJHCsCIbiSXeqY^d00wvE%PBhbh4A3$cCd<^=NtU614zS!I0NV! z5RqSZLdN>43{e=ev79$(6vJ+T2DRpQmA92QD^gGb{j4@)(^lo_?E;0kIcpkT=Ee0I zymadyq1_vMLr>N6VQ*Dl?cn*0=#PIR8*R;X>YmuNWf}UyI(oWq1<2@&hptu6)_>73 z|0Kjar_8{t_Z_{r6#chO^>YHNs*ek+-cit%59K^;4biSW?NK2+EgB~8_v!$uLvLZS=+ zhJ-2|8Y1k-s$;)U>2;Inpae~!;S3p^HFJqtZrE|uKii*Hqx8Q@CJlYi&`LIOtBeP` zR!P%$3hVd!wE_iNr3`mjW+&P@b!BNtDDv~)eR?2_pc5BRhF~JzrPM|mqq9P~6G5a3 z)Tyl$A7pEp`QL26HwjE;1_-GvQ}oL%i8gT4*o&nPOcYw2~rmUXer|mZHySJj>5A zQFh=}R6QOPT%4BazNeFouE`0>?hBzY5tD2fgQwZMcuTpSG(j1e!NQD}@+5tyzr5cW zt*GajT*hT*-hP z6|-Y`dHDJIP-7bdVNe%p0EcjvDz!q<#QLsaw zcG#!^b}<6IZNL$aYmGD)MDutdmqHtYiCE1~j}N6y!J?0xh>2>Sg3RW`pQsTS0$e^c zhSTp|9Y3O5@?5+PQBv&pu%q zF|wEGPMt@|8G{znrf{yD+~9xL@vF82*) z4zJQ@ksBGo^J{dtkz(JQ`(JdMli)GM`m*}H7z)Lo|`DB52WZTR`;{R@U{fhCa> z9^2hNgMDTgd_G~JrKp;xy-XDTeNE<38;@_k>&rhK+21RkTFAL>$!=><7&`rGf`!uvl22D5=|epBHpzNOI4+mqdyg}<-52UNkHENgvxs;Fo@RFiTcY5b*MiJv zL1po)_ypHw7~uUObSbbkz!%PSn=0n+_g zVf%jY-1FP8h_S)RmtizFU;V-;B3|q}zuzAJ`;nP7JBLGE>NbH-?v-7O4)1RAKlVyw zxuKBWllRM|`sS}qOPNYsqk=P!hoM&u(^%FDBWYUI<2~*Lw|(g*oBcYk`26WUJM)8S zQsxIALJ8UQq%X%8GPNFOe3``(?!Io9I6mVxQa)T`Lh6yqeRPXi`|}6Jq1)Atal+p0 zx8rVUXH+B$Ec3`zGt9~bP^}CFtjBl%s>c4P9rj1z@+-7wwiZJMtj6N9gf1>;D6NLg zmP4_ZYBBDPHYCVMjB$2(Zu}~_MdxNOf3_(6fTjFY=gFEnyB6=7z{AK`wylv?J0XJ_ zFnhAgOZ1eEDRo004D2DlCJ@wnqmO|rQExlVQqTp&MQR&HB46&HD{5-iEPu#j^9Dvu!+-b5bHpO9pS>S%Rk-6Bqg(tcR_&rg!EaMR zD?1D`9lDe^FtnQJUFW1|J~PCzY%@`#mA-HvkcN0L2E8#(W4H*LHZ(3*b70j@)ouTG zfk==wzPd#kUbfrRUSlC*R22LL3|=^J&jjfUJp0WE(Ue>Qsgl9NmoZRaHjc`$lu-(Y z3|0hSle{IBe81Q!ZB|X3nBvo2;3qPl&vgvOxj+kq@0&%Qo5l=xT|5 zB!xsiKy5!A=X#f-SaRDY{NqUZZ;M@?>iJ)JDQOn^a>cr~3@-xw@rx@WldeBs{T%V1 z%k>h!2Oe-6Nb{ug77}TTC2CT>C83f}dJpV|S4f$#d+$4HZASlb3N}eRQ&n0%dp|n+MUiao&kB3ogZH>b zBPBU0x2_%u7+@>5y#}FIL)1&-lbDFm8J~B*YtLdk%u?31O&hzGN+#>(3n;h!iRYBP z*X7$^^A+Z$i86-2Hp!5winHL{!0J)y(uM%%a-=o{sE8IW(PkB>mr>rSuSr- z?`q$`~nEu{!@23-t&_4|YS`dVTYOI{YMk z-S6&v0Hs$XC_$U=xHIj%?YGEDF&!}qJ?j)B{E=k$g})`=Q`%TVTzyB3irCksEe7%Q za$U~NQJ5Dy9QBOuN7EW^aRJ2f`MBedoc`E4KiD!xhH!k*1Tpb)M;xo>wrx{Zs9<{6k0przke1)Xo|q}?y(DdGQz zxAy?5de6EA6+x0n5=0OQ>S(48?mivayi}0 zyjwJim)XjQAQIMzM1Er>YMv^7oWDY__TIqo*MEO&1d&`|l%Ff!KkrLMgHYlJ*dm$k ztvI__*13ozZ=qNg<8B3b-9K6odahZ@Dxibf8#3j^cTLsvP5ZCeWg0Gp`f>!Nw_Mo!`eWw3 z7;R`xT4b@%tUR*q2iGj)so>#IOZ0-_jXvl`-SLzA)L`^>6^vk#U^M4t;o;8F4CIV4 z!#T>_yX&43L6@$5tTZ zvhI+4n!S;xPZO$TSZ1}qH_Z1e(y%fAJXZ&J{AE4|vq;%waDGYz>%I$1MQ=9J^gLew z3b4<=ZutoX!}@Bis{&sZ?`9vpun&G%q}{p2 zDTic$Gm#McJ#{2la=cmA{`pQ`>4hsD!*%unyT17<&uoH6Z;g4-6UYdzpLgGrpHp~! zA)PxeK)AAokD=Y5U*^HMlbm$w%)>@T4g)uS#*YgM&%zR!AHJ3JpHp-rq)3vSxhy{s zc3V>SA{}dv+5_v*>iW&?nic)hC04KSS6O@y6uw3c-0e*!pwqY<_#1<|+#h`5vt(Az z4YToJ=Le3gL|fwQpI6P=9RR|>&@G<^Yxhda{=(q+oU&es@IjL7wEW-WK%MVL? zDi-7+BBsNFzHxUl(W@}1KmIA~q{ic$XpkYIsXxuNOOygXi&MmSCFyV%jn4HZ5ghI! zzRxgI5+N1zbROoaT#!vXXwR5+6uj7Z^lpraI#3%sX4@<2r~zMJaJTgPqlOb z8DJ5r3Nd;K5Pjo8)`y%hO0LV`lu(srBBSXfpfE75#Mtg@%pgRWWyWwfN#iBJ+_jU` zRgGhyRqlpablOWl1uNnsL}06&dHGub9v@mfrkzLai&tffygtp&rRiJA{Rg%|vWel> zFpxN@KJz;JV8eC$rzZatj*da)bc;*S>u=kd>*v{jwsvTIYi`Hw-u@`)RY!)4$h z&e)YunV9}c>YaAJrwsceSsKB@mc(26$~va=Zx|T9E{@c8JR7TvFuzEI*KWJX(E7qN zbUX4n@aAi&hk(WEpz`&){trSaf3ZB$njbRZyR$@YH~GExYCLjc_#XC}+^SV@RMJZ> zvNIlP^m=8T`8R&ipi=s^?!=Su>A*UA&2Hv0Ul&^T4n>x|#fis;^`-63t9{|H&aa7V z$g%;ybyk`x(Qx*Ec=94ViTrCa;7I~XQqs5W35UzAJe`tx8CmW-;CG_(m_N~Z!GxZY zO(QqreTL}B-A|5_6O~3G>qje3bI!~UiyVJi4{LZ=w#@3%z~RkoSjSSnws8JW7LLxK zi}e8aua)yDYBgT0=!Dj(Xz_1@IX(wFj9aME27tm;}Go>kKmc%;Q{W+yIBsH12 zz*b!D>OE^3OGR+ziiat2k>-LAIl*UXFN52Hp)~Dhg<1s62CoisSAWM!6>)Reygp0O z+mUrCX=%LC5XSPZz|dbbd-NIU9?38DPApY_UpZ}r+r$H=l{i8jmIt?V3?FA(xx6m= z-?|~vzV1C3dNZouaVuBGO7O+_O;)jtU;H2T4QBr=yhswCkIlp-3ty=V@Y>q>GK`A8sV_Da% zz1c8wkN*OEHeD&P*>8^FSL~Bt5P67YJx|}=48K+HuHxa+6@7SyyRIm7(M+E&O1SiQ z%;=LsM1S$o88#ciC8py9XV^bql=nYb*nlZM66%ojDH|r;N?{U5r`G93h4Zk|l{aQw zEbjKftV`hgVWKDhh4Kbl$GT*yd7J`Dcr;sk^CFK)AnGSJBm4KV7hrwq0H0o|#$hI+ zWdMp9ywdNZ4|npF(3EUfV~tTIu?P{sS^g; zUi*+3KHG<3s{$VqbJY4gNN6H$**a>aGTeW%U&=(CKHLEXuCpVcpNR)opbiYt$XRgy z5N*zQS)<8&U=YU>4DQ-;B=cVqP`3iKKV+=Fd8tD4D+&*s&R*ono4RC%ip@S`3uRIC zfopy!NTSr|(R=Qx-+jb~@RGlL8{qlM*Pf5pi=JyoA?I5jMTdp{ac=m>(|gf*MXKi4 z?#5I&zPq)>b8bsPskiyU>-uC+mB#MtgtrmQ&rTiNNMGMdC zO}WN>_Nw^(=e*g>;Q1;0sTbB6SU5Rvo7!{UEA)(@-5uhdN)9_eM{%H^mX*)|WNbUn z`qB#Ce)VM&)e41(d(rkO%@L4&Bcm$%NAoTD*SpOdX2c?^zC4p>f10il6W-89+3B7* zQ}qX)_HMYW>BJ&kRoW(HrB8`4_yJa27foY>v68@ZW;oBb&a|(H6K@nt^v=u1Ur(_KqwRN33Hi%(}}Ub@J3+NaGN&2y%nu zL*9W?pA=s1!8IFW?%==!KsDaS!yD-Qdcg0YQ)v;@R zlSZ-YW*GzExpU+Q=wqi-eL7o?fIjM@tL2zr_@9G5OLYI_dAZJ);Ng#9HK}X9I=I!c z&q0%yCS{R*u`=6=Z^N|nPO(vS+MimV15sqN=MsmKRN=#x$hunnN0nAhMSK5F~%~^nh zq3E2MI{XJU%HY*+2DT$@9>1c5P25;Z_v~6;?0YD*QC+2!7M3}usj&^?uFv7<>fEzd z3zK(K?fhM}tjVA12IPiPuE#bDg1O5{1z}z-J0+;v3ML8nXXXp^N}8kLCPlMr{P8cE$~j8zw`I_XCXQd@PVPvpcoWcyO0e z%+qJ{%jS0Uw^KQ{792|#g(XKQ_W5^>hu-{-E~q!7Jtx|b^kmy-nXw|>hdWv3(&>(# z@$=X}WkcOrJJ)WE&I&$=htD;D1>H;R{hqk?^Xu9 z=SW|<|9C7lV#JSMr@Hg%aH)2f72R`&>BHc^TqD5Y{mr{Y1P^oCx6%mL#yVnzFKAkm`;*sCNO$>tHIOlEoIauJi&ZcN-@B~wk>?q z-Zx_NXQnhs=G0-Nq94|;vQ0k+q~37a*~v<epq)~LJ6C14z*8?9z@~=hLqSUsxo@5=GSiL@(y;ENnduOsgR&1w<4jX3mXmEAZ z{^{2>|2@|m`qMj*K3-mGCjtUx3M)iykt5=%YWi(5fdqficLFW?+N)^fXYMsJ)5|OI zu0JkqERa4g93zf`9_8}(iFTqJqo~1eqZwpFV!OFhE6ly7c)xUtjr^XtPKHi=P8q0M zu2rJ**|q1O_lXDEB#NE9r>Xu0a1;|74G7zXfP+3fON8ihRz3!UYXyWxkL_8GO3YEE z@ALUVz;!{w{{5k*aKuK;DW(nhr2;=CjiMJ2o6xac$bw^&o?noG&&H%VoG zZn8OTL~wj0mPsC7(EILFbDMjf$+>o;xCiCEy_KOm^QmX4&^fICCC64cMKoK0<=v2t z<+C=<)mmehdkxmUoF9+PN(61=#pp+cdJI;1%5g**;!Ig|A!_8*QUgTn1%*yva5_t@-YKOffLA9$HqYTlzfwEl~8 zsBZR(V7XP^#WQ=t&XJL#eZf> z4r-;tv;?`Iq}v#{HJ6r&?R+2MM z9HR)PRAyp}O4_33COjsdh0w)8XoWOqmgMhi&ObHpe;XBHZT4WPRP;0YptKE6Oq}T0 z7o3#os5u_yQDV%O)pq)C^w_@nOVHR%F4V8PJH@wk_J`GIu@`G)v_ik=&x$=epQWNu z?Zv37J>b&1fnc#*2`-uf=C$Xp9pCM?HxO>c`_@IwpfBRWwdwNDs@EvLF7uhnP|KrQ z@GXDQd?=haBF+sxE(h@+bFpsAm&Kyj1a(VHWY`VrtC>e#9#=tOL{rjv%{s>j+56(r zOZvpO29|N#Q#{Vni3kV@FI@b~E3Lbc(6RCPkYpJg`d$r#!v6Bws`x$&u2-=feWY1h z>Ic}v)7wSGlenYs4vX_%7;L(q?(dSG;N6Uw zk(QZ`Q{s*VuM{DggJi;!h~iqWyZQn{SX1EVb0<$acPFr4BB4fKHCHpByn!mNoy!V0 z>vj%4 z9xHDK?$7bKk=$o&e$37gmOMhKoa_Fc)lZrADVK|-OJwwMeXYS*rTNGoySK%5IT3zgUPfQ9_(Udqk=vlJf8~qSB#R{eL11FdMtr!NW>HVEc*-NZ1 z_nh|MFg-Y<^VAv_!xkJOtc|5)KV zR=1-xyfgWyn$ z8w^l{jY2Exr-@~C{m24W)u>6u{EXaVC~tLoyPKFTo3%W_huiOo=X$5?9cnA<5C?Fp zr4kIp(WxFFMRz9@TP~U_lRPyZk(2(yn`8HdyN*`9q!2!(^J8TX+A33wH;LDWgzL7Q z4RRG5Eu^(iwlhED=wO6gplT8(`q0%<8BD_HssJH2$w7qYsl(mJsJrKUPm^`rJObX@ zN43d7#%0KBL}7+46$jUedxJ;C^5U2~nzfopeD8OvVe4*#MuS*G)kv{m} z<35jQ4*h0pr*LoZ)lKf_@$=TYIYWpiaQ9MZY)4_`@% zWhM!Ko-{IcURVo4*&+?j9^9m)aHYkQ=ayU6<@iAMSg` zUnY+dIN5-UZ6Qz06vK{Peq|?3l=kAPsRa08>OGb<5mb(ipBCvP4J{G?O81 z@sH9V=n~@WxC=h)v8=-~=gQ%mD?#+oliJwCzu@U64?ZxZJ=w&?I80rGd28+vuMItx;rdjQvR zSyI%nYSMi98+#Y2MUE@qJVncZ{5!V4ig%XlKJwMlxBa*ilM_16Q{@;w98g(hJNCfl zpbpxtPuiZN{5h>BX8G0dZsg*ZlJN}?Ray9`z(Vo|yvHrC9qc`#V`=netS3H(8P1mW2JopCCFL#!&cH?1Bs6GFXk2!H;4ZGVOA-QibFw=TFj zT|JdEAAhBtf?=VGHMpa;bA`sM`P1SI{;lVd%mcjF?< z^L^e~$JgETINS6L6;YB<>U zkkb_)_rDvnjAjZ zbPGJZ2vvCL4f8VRZOO+z`@b^Rp~9vXK3hUs$>O3k{3GywcQeRf2)-RCD}-|f36i$H z$C#Nb+WK?Wx0}*EKuE2!N_GCBmMxMmlC3t1MiDjYytiI1lmFJQ>#_ZH@-_Fa>5l29 zozn0q(JwV?4($??cX&7M?$~-&{l{|4t-fhZSgkS}9zI9v2XPo$8nWY3woF~4&_*l&r$OB$E0ZT0NK`GqSTT_sm)wb+K9mVBxE(a3P}_H^{M-o&%Z z-KXB4?A2zouy!KPGCVP*OraAN$Fr-UiR;q8^|^&#Ud=fj4r+zRFl z-4Td}N(41ZEybEO$DP6_DF0Y?hAzq9@#DjjvgJh}LsBnhrpuBr-r?cYthI4FUnmaU zM8M!Gn_t{z5^{j9m75+?E6zvaS+v#PG|v=oZbDO|3mnk&tJIyy$b-C0t;&H-=mxv9 zW*wd)=&MpobTpA?JVcGU7?n1F2H3B7Ce!Q@$jzn8~!SO0!bQuZNS zb5NQV4cmXPRL5nK3w}e8Tp^2xvd?_Xm={=t+v>Oxm=7Jl=*nfE-6aD%KR1tc@G{g^ zUtRo@9X(cgM^YDv?l2vZsExOdWF4=Mdb-s|-OeJ>+mbYRFD%}t7%#H4-eogY+U`9bPHHP*j9ffPUe#}zH* zvx|e=xRnp%mk7OMxymd$Vo%{}8uHn0RcS8R<=l7L#lD@@ah{?KozwbF-S!FdTcf``l)eLZ9;AY$=rwrn5*fg^mmGIF?kZq z#Atb6?eA-+L+T)9kzRhUf5Z2Cyu7QTt~=4oCa- zfGylWEr23rMmwa#d5y{XjhJzBpzF(G&f`}M8wf81q?|kkDH<^|iRX^FCZt}WdrH+JTo3hB z$D(jy;#fDON71>bW)qih=M0rSRD>qlDOcm$WOl58J0FRa$4S^SK1I;xyitw~hrgI{ zEVH|D_d`QyX>%dz*d;>P0+@Uwwc*LrT_$L}th4dHxTLPA%Fqp)hwyKZfaH&HAdk>d zh2}LzWCy<*b@?_Ro5dlWc1W{J2DS6O^bumAN6Vau3#(|iP@CsGVAUI8N+h9#dy>D| zs+_>_y9>-4A8`7!>Gc|7)fcy=$VF0{mhgLK+Jw~oQoPGWq&VLqVUr9jVfHK zH2qV#{}Q6avq4y9Kgr!XX6gK97Y7nrgh5jwlqLjTM=-USoaW;#yVMNq5;UM zbu~JvoTc%pMtacj@NNoT)qY}N?-P(Y?}wWwl-p6QYW^?e8VbEfWK$6TPd0^g(YtEW zx76w}&yj8($|l=3(HHctrXJ%iuB+*h*@9 zyfBlBck6LC`|n$|+KtiJR-=(9kJbCF3;3*0P$Z`WnK=ba7ZT^@?qUjo4%W@P zto02zmCJ)a-z8k%fW7Jvp_gkf<?EH$g-%L|S6yV$3T=B8N` z15S|rP1De_?9W*qy47{w&@;At7FOR? zt?1@6{{@$UUfOG*92N;ASt9IxXInFqG`CpYQ?bLuZJ{3YnY|m2LEenoPf^x9OHj^J709Z`ZTXM3Fug7oADtO(og6pN74Ylq*Iw1xV z_nwDm$I3 zN;34b{#Q>Kr7A@KDGNfCsw@+^HTaT5mokkk>w1mMFZ=-rg^1@dq3S}BeSySFGH%;9 z4+>yP{Id3^-)ST{r$#8#Qj$6D^xS3gU;UU-{+h9bxd#lpjEYQXr9`S>2c+!U$#%vu zO9>nQz|U-}FNCaZJ0g|Na--MwN*kgtD>7-Yb*k?T-nCnw?#%7Bkm(4I$_l|F*Z$Pz zq6O@b73N8sG!~Q5VGp5h=+`*qaWP{h`B)mi^piMV1p~FJ7Oz*svp3+fudPeF<5fwj zyPw)xy1K&B;jcdue8GIN6J8JtPl(8iT>0xlKVeEx+RWtN2=rH9+SKDp-!r_)q09P5 zZQD7Xp^s~JgmeyTakdRx_t4L891*N1$>jM?9=7WC9~>#c#+Gj6TGICtKb0MEQ$5sf z#0+7xbr9HjvA3<_K0N6Eu>Qm2VON?OOUK06Y*3kNy=i#!&jFYCumh11=MpZuOim>% zYQPbeftIjgr6hJri{Rk?1fTPgEa94pNIW%LMvAK1-bxTwQeTpN#+k)#tV`sfI6Wy_ z=dv^n?|o@qv_PAdC&tBG4Pt#j;yB4_{5YMXu)3Md&5gnW9f6~+Rr*=SBApqNWv&)# zM!VrLVpG*w_EC&REYc0817Awi-%}&Om*lim1ehCttw*56%;h~KJ-wjtvVp`me;@4> z8SHXsAn1U^1!fppw}d$*wK$6uRNn z5JV`|x+M1`Og@Q(55hj)RtJ^9Vx|z1imwf=FsEYcoI59Ie5wXU=I?<1!RMXlF#xcULDswqg?eV@vgS$=#< zea{~uMjOjxiJw2`zk0B~;B)o<~!--BM#jaF#%d+!A;YOjV{8*aW@)PK`g)>4l{ zs4(&J>+|28A98l@TPSw=e90r}z9vekd=A!@DU4|^MxuxH=mkez1uN7VU<36Yt9~Kg z!bk_ao&^_0E9({T#g56#aKGG~(eaMRT6k(BL7)%`X1O$MSavVcsPs%p)OaAhC1g4q z%QV#f?o-kqLot2?o^RqV3uzl*u1+w&yEt(Q2$wy4Af0|Fxdi6k>w@QsFTbQ>`S}tw=;zs3Gl8 zd%{o99qNQe9dj?~=;9wYWh^FC76Rki{(*5X&Y3-n#%KnyWsp#Ls9UM>#z=uNj*Ht`s{6ZCq6#==|?+az7qV$!D!&D;JL6T&R5Hq-am`6}!(>uq6CuM=K4pzd~r8NlBxNGMam6$YNYO>4S z!C;Zn)^wE^H7QFkZi5Z~rFy2giZR66!yK;Kje-JGPRgOoRYRbe;DKcArt|XMIrTSG zx7jPwQeN9h#f+Crn>FrnrpI;pom99QD;j_iG6(f!TXz&!BP!|)tMozHxX*-VeOh7F zn=^+3&LY$+C{YTtGQhT2X_fAAN8nO&2oVhnOs9pCrVNkx71sdB^JmM121v28?Vb$w zYluP}))%0I&|H0x{c=I|?R@U#dA6Gp;8}ZR@GccJu5n%u=s!xQNbl-oZ{0Si3N~o; ze#0lX4ZV~Zz?-E%jdji5e`cV_v%?|VF zZ>o5G9)7e?*J<$*(QlmoJcwdzeKV7^TD1)Yel^9j$U_;1FLs=)#14^WnHBBl|8+@; z)ns~`tm6bc)g8X!9SkuN{zqh!e=TV;4IXK6%2d*m%1TF$hox?AaU=Ijm@$J)wO0lS zNOZB%`jGA*B$3BuyMbiV-%?c@`%D@lp6HJL>~9R^Id)d8Sssvx9e}hL(-fGj>P$y> zNH$XYqQ&>wRMM`OyvH>7;V@|+iBqWIVD>Thh1b09zb)ehZ#F1AjhDCeUhYLLS)`!| z|F%N~X<6Sx!YTp~IEmKZALt62ZkR>XC7B2c#g=bMKO_y8zr|nK>>K`A=pr50+HXz5 z4$%Pb-L+qXLibjYfhbzK$&AH%D@2g#nNr{4!F6M+^-nPmMpwF<-5xS7Y zR7yed9ZqIBBX4!0bD8n0a`>O{=Hf9bH9aaNZ#c0F?xLh0kONji`RAt}F8rfUoz}h$ zZ!!zyE0x~)C=@Jkn(=}LwV=cE@6_%pgJQVh?b=OkjEv-;h3@?01&}2!{cy_iZz2IY z365QdbN6*LWnuE;e9{3ANEV2@@z7u)LrR)A54)HCTQt(ra5{Q0xeni?%%c{D0(P;@ zeWNx}Q~$T$&-MnS zxxEjgGJRPYhXQLlldHR^5Dw{M=^cj=UR?8#GZz?vYw&U6 zYyY*k^H*2B@SE3PeK$xMG>89xQ8UV^7^Y&+EeETe^Bxztn#n6Agut44>9!{=#77zy ztHg*h!w0<*H73oAvAhleeO81rRrz!n>p+iQ_SXh)hQqq-R==ZJ9kswAJ)Lzx&oT0A zCVX;PYoADY{k$g*idsW+0h)NFyQA6YLR}VuQj(doBj%}nFd{G=ajmDAl0yYc+^Zvoe(u`Up>9Bs-@rlX4O z#=li;35^a&&Hd+QdgJi72k1EiNO5==bgkeV-Iw%h7E$(^fRrfm#%LobETQLQ$1H(P z;-GtU=^fRwrzJD=$KX*=Zfw3tj}Iy3NKOAuSJ7I*IvTemLbrM2+8`xvZ~3IkVAtsp zf3)3Hk}?(sWfDKPQM||4N|lSvfLqBGal<%*0f3z8I!#CoDNuX3R~f-u?}fjg}> z9^%&Xz(M^Tdd0LT5L=Kf{0CI;)!b+HVCr}sdUyEP{;K#`ywVP@px}wuDSxedlfL(3 z@zQg4bKK*d2~v7h^@Xf0ocr_*tB+1Sd%_Mu;`+tKb9|6_tN{ksoUYD_@c70<0MSjv z25j?D7`-N`m*hy#*UlTi4c6 zuOyMVepA}VoDtMIh+xO*X#Xx$9hPiwhS{Od+htgTaM=BnOZyKzUIk(H?&#SERW)DG zB5?emCbrcoaCva`DhDjFgb8$7DSY8epZs2O8Q<&#U@x;KnQ0Pv6#A%WLXTlahytZZ z=yt&ftI!!nh1npzFOk>0t2(KSW{v3v<9_}A_QHmByD44+q=1l{er~19OLs~8T7-o| z(E6pq!pL)z41X+sz)P#}y)JzTwa}lDCv#aZZEgLc0-B4g1a_{rU zJO0ss?)Y(fC9hSp+W6j~o?yHs#u)021$Fsm&336bEVIbqDF;{v~R zN{RfpdN-VrZ?|Eh@zs0ig)%}zqFbnn{)zj~7qC|6Jha}gL6Y5Nau#JwC$WW*(sk+6 zFdyV6$;7J>$Yt%k2@`^Z*bcV%YHfF3X?@Vfj!kLUUGK)LYZJnt2#NF^I+=OxCtET6 zEv&}sC}~gC`-_7m{ghm){L>S1)Y2^1m@(c!iCl~y>KbTUdDJZ_pw=%+Fn+gjtdM5e zzN%4u^Z)>-U>Z*@YN=%&nC|D`aVmRIT?yE@qIcP>X8>;DlM_`V(n-swwC< z(Jn|(RX}*5UvW#qKTLNgXR)wNZ^+zjTT*x6{cUVwwIQY{`=RucX7cr1Cnz;_Bj@v;+0+ad`N#eE!R?v`4)U+1dP1^q7|>0;S%vKZxWq{ZyFbL+c+&5)njSfhh24UKtlD%(Af{!#8@+qgF#e}lt? z8YEqVu4HXo_uRCr0d>!!4rx1bK_eX{#*f~Ul%xbR%Do{_B$`LQTjtoVhDR!CmCKz_ zK{~~M^#}Y8r#DHK*+3YQ+Vb_I!P|0UDIWBHu#a^ahrg;8HtxHFy>x^XkIjOp9wFx~ z8P?JB)yoJ2Sk68xl8@rJ@Rv>K!gTHZEc7O>u=NEU2x{>YAXp*a;F1g|Cq5wIpA>E7 zdS-)ZUwub`B`RHRHCkQr=fTSmE_6C}bnbe`3Zie(6t1c*kCm`J(rdS=98zY7HM)cg z6G=3R@E4 zzLNpz*HbPbbHt2R#=7V)p{khauMkDA1J23i?34A-Q}xG_1uaA(#7yB=3O|AI6uu;k zzd9?)XD-N4xK~fT@YJ8v=@cXXl}T2Qs>`7~K;az*ff+w&iZU5_M=8Qaw{74nX^Pe}ED}|Zy!XsYN1|FY z?nAbWkM~%0n}_P=VE35;@mBi>D=q|^hH=TEwK&OV=b+T?yKq72W`wb{)G7RF0;Gnb zNFMryn%W|p9bzYB3GyAOH1nqU0#M(jafzjrE|8F9({pnoB-CkPjxg&5ga#^suPE)( zgtE#aDfjv5ujS4tPeuY_Y^_k$T#U+u3yM-`D&m@6wq8(o+fG7I}Gvk6zxS|9p8}KVdQ2zyBJus(XsA zttn{+nUe*9Eb?f)Ub#F}ab4`D+lSh*Yq;4_P>J zlwxwmve88}jNuH;DFX>-(EVUl@Eis>`Zt0Mb|z$E_~3tgGv#!?UK%R!K z?kuxpLQsbhf($eZ^@d(|a?(_a)dSZuHSEIm%Hhk&jEg%k0}-j_eZPR5x#K6~CyMN@ zwy3{x6a(JN5RsVnwB_qr3y03>8dRYoUUI|iOhu*W^)V%H937Bz1sL3rHfZ9%L4xnD z&`DzUZ0wGq*KOpw{wR6#ia%!rk@_ZTvyZGTI^XH|QzB!NR{wUj9w{;o|CK4}h!K1E zFGlPxAU%T@bZ}1l)hRm#vQXXPw+Tvj%ep`Cu>;Tj2VuveZ!4fq;jN0IPH~%Hg^+wq zF2?3px+8|@9*Ksb?RejYkRK$l0FolB6wl^B%nKvnrC1-K7_5u8r9-IU{yiX)AEI^X&Flyh^%*V`evRYK1CRiXuR9x>5B{MU=h~%$tT(WL*iN(xPsHH)n=3K<3 zU;D+84>LvQvB(q7Q@L^tzjl|PG|nrUWwI6>c-nXh4Bc+qo0dX%!ia=}@m6B8$5=Ru zP>f+e0PhnQs8>}WAjsARcUmioABq=ktYMRod(nTWQW2$U>^Ikcs>mZ`%@kvO3@VOa zDi0B>lOTJ^aWB`qjH3fwDA{71ks=NuWqD;d|4{a?HQ(=$Ag46Kr$~3I!COulB+qD*O;@U(b7=?(00^{r`iEAW`;AJmtCI%YiIhSZx>=C|As>| zNdc&+OY38*c4{yTHE{P=Q?1;lvAwl%)PezMf6=;w&P{5~{PWFN=r=ea3a{30$v1sqZL-0$=`*eMc3B@RZw?Ys*i ziC=)JU7Zm(m2{!oNT9NiFCLyD9uj4Z4ASN;;io@oSnuLUO6lkf7V#wY#hmu%!6+bu zwnMefuT0mVM3VNSENQnOl?XeW* zKR2E0iSxuOwag;Ycqb#014FFFyvv%oPa{qIY>wU22e4IK!#3z#Zi1sB(z2Ki<0y=w z=6etb+U6(hIVb*Q}Fq|BdS)XfG0_gRzkqVP#vsCsS1Gk)4G$MGy z#?9ENAQ5<_-L;H8aM6mc8n^yicK!h%&+26pzjXfZG+A7WlRR+wD+N;6(oFIeBwls9 z1mwvL)u zRdKx7dRrx`f*Y|FYtlrm7w703n!blVL6}H2+6g5iC`K=FY>Egw&BgqAN2L}Jcz|9F z+K3k2&<;OkKSzxP)Stil4M8k%9r#!(}+C z{0{5wqh$&Oz>6pYXpXDy_A*NN<#%4nb?4lQ3DO!1L>FJ$7ILv%ja!xYVO8|z9%ooc z(3)?)=mgz`t46->)A+i<6l1Moz<|RwUsEaV>CbpIG14kr&x&p6$uK|Ge+krq$e;Po z?P0;W`r@p@-aFnfwf-E=g6eFF2sB-+Fnh~Onz#>lyfUz07M&!EX#)RjBb5W z#7b3#N+#7iXo|*A#ISkt*ZaOu77*HIh4#@O5(?s2gSBK66>C#|>F@U;k!NPq zhh(=t056!O)9Jep{5df#Fsu7O6nj}S)1Q;|@~j9!>3eHU!go}c87WbI5iH0B)lhFw z%$;$?*k0HoD3I(aisPBUd<4=AD5W^L>&pk8`14<0#Q*u(Ujx^M&PYmaBGM~pQkKwN z{Iq!QqmS;tVLk-R9sQ92ktQ<0^GEi-ARRoDi9dTCUVxoFbjF9v?>m;CQkMhtkPKIo zs$vr71@lmmca*k+LZ1>;h31*upz0v2uk$NG{`)&f6?_ zrMKR35BYO)Wu?ti*Up#{N?~5Wr|EEbMM#6AL;Xx%7)K{oWKt-b>Jqmer!v+BIiyH^ zq;QIe4Suj6waH8q()x;sIg9A(Q6nTxlo^{0`1s;&u&i<}%cCzgion{x6i&UBu13XG zLilk4WQPzfTSGS&K-!4jTFi0d1o>Mae;1r1X@2doG#{oou#hQ^KbC?jF<;B?Yd|ya zPcMP}MX*>@>k;|&K7|^&I*^eXo~?-pz=sM@lD6bU6~W|fe*NZTcoe@GxJU&!z$I=m zl<7K&ztwoK(^%m684d6L^IJKJ{VV!NZZoyYs|R0Su7kcTWmKmzn}HySeI^i3=Y>hK zGVt%DmBq@k{wTs`P7S~|(ky~o0GYol^2nZWEl|X|Ac1@tJ5q9iK-Y#q=3<@Nb+oTP zC;cbwpu?@e?uX2{!i{+D(aLh;fj0+}>nMeeAN<^K-^#&fLuD5)oP~Wb_C0Cnzc&5h zxmP&W0&)FcPW*lUf^6m~aiNJoaH4TvTW7jxjp6l7Y#VG}{pg<%q z03KLv^)JwWVsRaR+meH*1%P}IqWIXwaqBq-DaH^yW`zdN47rbc@4Oykj@Czz zAeP)U^@wMrS?Sokk3pZyyR+RUTnir+$?IOLwQvhU1~9V|1Rms~7T5^skP1>lmG!$^ zOb|U$@oNSHMB%(kepCmP{=cjPxWTFdgV*N{&g1|f1&;G=(%NY7EdMgtviuk^`^@E1k?#$o6HoL zqL??kVcK@4W+IqIc@WgT+;G!~XNy`>0bR2Flug-Tm3TYbyeCgV0rcLUu6vp!A@a&pIGS4 z4p~k?zKiBjla1)#nr#27EKz=~(8vJujfKd=&-P}xhVpwNG=S#G%)H>bb%2dQE$lU) zlYEyn{0V`?#WG;;SmV!KG0;Ozm&gq?S2|*h%oGQ03xVonDcglprIo;`aZ=j;C-9ls zf_a3ceVyE-{smxstPj=C4_BM!RAfxvvaVSWZYyRy|oRY+$6UC!{oG9-4-nq5k z1*Lq2&w;n#Qu${yxSOMBujtp7!xH}HDeY)3&epGX~ckeaufU<<3S55JsqUxP>bS7UnFDb=xe>n_uxwm^tcCk zN^fsc2OHX_1Gl!bdjGu@j9nq9JFIhH+YX3Gd6-K!{mIip|Ie2h8^??8&-&*!Ve*Aa z7T-4o%{|J}-rF+Wu+}T>pPA<%IrT*skxQm~@9dAJ& zC~bDbhWhLG`=swyTC)RV)}LO24L6kD5us0gSknDJ!a7H;CWHX%WA2cASRyk5(q;3- zO%POG1`%#_aWplbVg?L&fk3wtFdhrgS!EVAbVYhKePNYB$au@|Ec|G> zd>_OfN(jCGJHFGObLy#7uvwWBga(d$Bb#i1EXiu3Yu&-Gxgbe69cnw@ab+mVE~ay( zp}Lg<3#sznUnP^D+0H@y@FD|NErAWW#724PB-yGgTsRM(I4H4u<14Kss?G$q-G4sJ zBwHrDW%kdI4m|i~7Dori^r)IO+;M!QH(7Og;qc;?C9nQ7kIznP*xqnSW0;Nx{4ZY!LXh%BIV&Y`#u)&ZW4~NnMP@qt|tmPc$Y9<6tZ{1@@m(Q~x=ZO5YHjc>1`{^%zDLCy{;8~;pd$>p4>SI+iT z36N}449xb4M^3PCqywtedd|#x0CVw` zts2H72t@|7fXI|MY?|AUj!h9y--C>yi)_zj^LlJ7+G$A|I0m3NXpvJ>i;OQrn~l%l z2MBPwpzgFSA0fKyQ*3+snV}PyVx-ACeAKA%et~F>pE3c~``^7k5Ev`ra-_uM=Rd?Q zI~5>qPrisOyqOSuH{Lx0M^8XIf}wEWI^W#YBb4VJcOooBY*gI9oGUUw+9?|bZF);h z>jbQ6EEJLP1wa+uy%8O<7`udsgn9o0r;^XuHz6=07DBdb4EjLMD2al;oV6dm`qSj! z@Y6a0&slR)bD_?h2OAP)M!upKdW~PaIX*9~TdXa72693lU#seF)#PdR+_`Z_jN$cjmo`+EBOH#pS>S0wL-!;7O$d=robSW;Ua%YC|7@_fP}G(+Wqu-$W

kQhSt~ z;obQ?e~Cm^CJ*>Eoj+%3h0fW82|WDzGLd30AJ`lO4~FhFJO&>t~FLx;3 zr~?mNq&|yM|^zA@n}=J zKWF2#HeCCt&(I7?oWGMYg{-;)R6mN#UnlQMIBea6QR=}G(;1k}=RNxyo~n4TsO^F# z<)k(XB)G3b-(Mcc1k*)mM#q3pc>8q7wVUTR;Qr=+Lh&n28J#r3A3led72vhr$;#tJ zU{MdI16ZV~NyS{_&|jks_k-~zLKExxdvF6H0~jEsiJq?#l^P){gA?!1Ye2T#2shr4 z_POR}M9$cz3Vu{^&+i?=q1Vb~xHw=?b<@8<4SeIEHYvcnShF8!*xctqh)@1kGoms+3He0 z(49q6)DYxL{TN&&i|c{al&zy=hBEbbUxn2jol>*V-VlYDL|YeRUhvgm0rW%s=N{PN zk8o$$w_Px)eRT70npL4ah0U`wm5gA7g?%=19z+kx@G!g^tk}W*i?`jBK!x?MPa>!M ze_)RQWUpj~SpwlHI_naXci6EB09Y5b|Gs=x7zYkmh^ht89ks~Ad`0pi2zwa?QnMaC z`+(>S0QD$n{91>x*={I4BR1;YeszHX*#BVdt)r??+iy_=6hyj2xM1Nl!oWKDbZ>!25K65cG%9dS=lr3xzEo9sDS) zqw@hd_-BB`n?)yJreMT)2i&h#60~gWOMHGXc6Qy1WbQ}$L7&?NG@i!8Ig(7kS7f3~ zK|k;JLIOG0h$?N+t$Kk{$4}*KF+^Vu^(&VTW_bbT+J{322`GQJB@Lhc ztu6WgSXSsUfJDG65Ri9YA2^NNTT(xwYRtY5myBF0bw7Y{_`Qn@TqaiwkWC5Q`yo&R z%WF(na}c-=fvbHt2x@q};-(Go6;;+}Szb%mS9`6A!@1I06mY&MqBO~1YD9{iI~*sv z;HS=VSjz&@>DFXYM~NTfzgLh|1S z3*-OxV4b=G18&>aUR&nk-!=$SeqSaP}GZ{I=}ke158jI_;5E0HiO6l zV5F7YMMjZn`7mV3>@Wscfbrj+XFKlBPK}Nwd0MFk(QTn%cE_0vWB?Y-kue!P28_yZ zfsTj##SzIiyvuj`E}?lqKr#R&q0VI5zIrSX&D{3uL2*Ja6gjEDbyGXCEcW>Ml$r@OAZqnYP_ z+Kd{a*T58L;HInJulQk9#6Axa7BHnI^4*qN{OaNEw)+wmlA(C{Y%4x_c8R8ZHh$jx_*poUV+>6N9jH%?k#P$7@`4y~GhJehp;WhJ-YY zN6`nhegxB+BBrM4sW1>KsEzY|=L(+k=mv#a#bpsb2YiLX+I`h-61?}Om@8K8^gU6)|5P(hIq`Xzcx{+yh)xmMsFtsouyH?-n+$H0yL$L99$ zAvU0hy!SBx?U+!7vIe$o<6{zm<4zWQZ&k@L%N{d}~1N8J5Jqwwm*bk6bk=GFr>C zS>67Y#gRd2|1_O9!+(4L0g(NBn54;i4jk}6q9zBYJ?bhD>ah<1+7oY#Kna#RU=~eD}3$jCai6rvu~3f90Q8*LD&R zID1Z-z%*<;`pdi)OW=pQyAtc>gU<2UCv$tDQ71H>R-Wu1czG)Lw|IYA;SYsBAt#ui z=Wri2py>BCE_&I^D=@eNEda+4G=j=|JL~qjE|HJtj2X3TFN_W}abDDSW92nu$2Q08H~U*IQ5(M$ViAqF%3`@5*Wegw!-3k9t&& z8KV_IR|VT@L(To!pUQf7w*C;<@IoGyfGlQu;7^xEBxDJkvw-tn13=nnF30Zy0Px5> zV}d^r{69+=03C<~kiuy9is4eqB)t(`4edwHBpt>o#a1?e`=?~a2O63G?DU^i{AbB5 zzcTpMtHS+Yf>>~zzF@5GarAwf`u)d<^v^jI{>vQee|ru&OIzUu1_R8rQ6wXqS^|r z$bjD<0z3)r>kfVq=`d3rrUt}}yhwq22xFm##Y5rMf5(@f$>*%c-67>vwOcY?w-$(0v);mX*MH8@ROFvS^zQ%g5Lrit-D77=$^U$k!7Xqf z|FbReEB=ozyq*)xiqAQ$f6;|BuF!CLnq{ugsAyWSaPU9rp66{e?mR6~Dit#3Lqrn} z!ujinL;v#fZT<^*`rofT{e7q3W%R$w;j+j72mAkPoF`y;u0=n@O1D)ca(lrI}$$gf-DSZ~9o>H&KiYwGrewIjOy^7zhr*If} zOR#()BeVWs9Z;o2xqsj>uA;wY^8cxh(+v`sC}C9*L@1ueSo!tAAjOb$53FQGTU!F1)Nb0$$wQ7o21~U1xOCk5^bFu0- zLYJ&{bQ%O8zyxn}G;gbFlofb*o>zEx^3+OARok--Qf#z$x`!)tL~fDWiGCfgq;fU$ zBwIX_o%q+rEeGuLf*!zVoIjr_cM7w*u`1ra+o0jBvaZCNZq*|rYw7V=`Aj}6w5Pz9 zrttKxxakq>iUgIEj_)^7GWm#0rBAY^)N_j}y;uWlhLh}iATc{U`RmbxKLf*Myp7K) z66n$>wb-m#kcz<5?MgKUpRR)`n64R+r7lS2cKJkrrR<6^idS^Il5neMQsYK-5G zyB!OzT67}ZcBKBg<4`^gtSMp3eHI@TUs%1F^yB+=$I-8z(?QdZU_{G4{5pH6o1)UP z#o1b$#^W^>gIoBh^?I2t!R%TR$(H&JxeuL|)wb;oMPIJn5Q!K!Vn0X)4!?dC&He_{ zpAKKXaVCJG1p_z?rzYaxOZt4ZWr*%;1Y2W3Y8Zp$L?t+W?;T47A_+{Swh!8X-&-zN z$o&<=8e{;okoABC&IRl{j{S}j=STZTe|n06?F?)zdl3YdZx`N|Q4KTfWzi8dpMu6O zk+kTaB^J|OAtlB=i!bbXZ>-n7B>c4G8j3`Q!C7PsqAUMSITHMe+M}h`k#rxrXu8a( zSg+Q;Xkp9SIN_i>EYmmDv|{SIrZ%gzY5G(4=Np;U8gb-Y zi<`C@<|wKyI~m$pxn#=qyV~yJb!KXxTCn;^45~L+aTci7mR51?ID$whL*!GO<^>84 zQ9-rSX6`BGKVU2RZ*B&_m(C+EntuIbisDKF)Gm}V^`8{vP>sgQ^z@LXydMO%MJoOm`m29=T&65%X*gDtW|{zvEnO7fsjN6%=2gDP%d zmf>peZvG@cS{ED4XCgK^H@BjRZxeBoDHLy1m0Q+-y#H%xB#81 zWKUI-K&V=N%zDE5eXlS3C+0Pj{1fxix~E_vRE)b*ltxooqWyuw^}#BmhKq*weK>dL zC#E=e!)Dp?FNkiNE%)uHHS(dBoQ?J))2xTz1|OalCoLnCZId8srD4VHkpn>vL!4jY zMXIX|%%reD{{1<)pdxY~HOJmiaotWi2h4`EBbA^FrDP zfX!BZ7!+*`9|e9#VZNzZCl||^nCaebbuZ((lBaF^)oMb*NFBnnWHf?G&*6!v_cStc zYVp^0;BcTtw#n3lLW2a|TzgVW=``OHy_qN&5(8e@PQOlYN##DWQZX9IsyK!qmF8J;CGmRLBiDF>qmEaNHe|#_k_< zr-2lG0{_FO>1C}&1lBR-NSOaSG9yW$@vygSy}Z$I5-?o@!xLlomceyh1-|x}#U8DEc(6;aOQrVVQ7CqP#J7uH z(pEilS12VdZ+Ga{zVl+PjzlQpvXvAQZJUrdr8~&W4wy2ZC^cZdrC8Jfk&Zg5frFOP@J3Gh?~%d#k$iX)4X-)1uSx@e~HEk5f~Q|IQAwT4m(v-(aXV}s#RlvhxBwMqg4>ZiYK`lwyM!zmsLSJ`}3#j z0!?s)SFho-QL&_Q3^KgOkIX%%1kIP_-cXAVRHr6P6DRpzV#czzEFHYH&r}ooh0Oh$2XFLlf6`Z{`Iq zo?+7F$2*0AZM5zzmE2-8o3Dts7BkwB`Lj)GpEz-h#qYm7I8|S)V20n`DzT_h45v0L zzk&s43ibtc0@%Z1VF9#?;pk`oftisw<$d3lt;HlT#mbgWc|d&HegY5@DCu;=ki zR^p3jU0X`#yIt;6eya-)te@yVLj?3C<&!_e%VbpItzX;wMTtAB&<{OAEIJ+wWHZ9Q z)8VX#mkVYlF@-*nW10Lh8oiQ-Q3`#bTRI;YooD8RQ#cqSOw`*F2ey_8W49jdl1vWDo95TQyC z!iem#lQiciW?hmrcePZzOLi%V^~XUwa&W$?HS0@xX!vo{7CP8ip`+FH`5n4HY)MCTZdbl7sLUjpAS zUV<2g9X~1bah|%?wu*Sc-oOP>x1F=I;KsYneBD!36{>j*;zXC&wG0s+kC>dIgorLJ zUCw7;ySX1ombTdC2#R!026aUjy^DeT{tz-K!Co&yq4lIt*VS;@33~p8!{?ci+nAQi zOLH^9cw^`x&O}D`F3AMn=PRxkt{-_j8B-y~3R6|0`N~ASTfY&@!QT0h-y*?KVOye$ zRuLDiOk%V%Q}smrbX%=dKuO(&{W(5&vc#D|&D~VCkN#FgM`4W&rhh9AGJ~F;vVGyT z{|66^^W{1f2VvjbkC=#ChhPblNn+UCB)g<=wLwtx)K0uU#ih^WHuQ$YmFD8&T*@A3 z6*VIi70 z=^)RDnd^x%Fp@OK^!*?*wAE;}xpyO^?kvk#Z4K0%I$g z2^2KibLu*~bgBq;`47HCK0OGQlOYjOV^ir_@-=~Vn64%qP7+xZ6@C{ISMIBBe>QXL znsD5Hxc}`*{r=xx00B?lz1Ht3c%vBiqE?38?k4;jzAnxzS~uem+Z3yrka&;5w_L4M zKDc!@I3&6L_ZIbZ;7b}A{JB166Cx*~?$KVBCo$06@H8~yt=*F%4KB+AtZmm}qwo;z%eJyn=QazcU@;bO(KPYs6ivd3V z?xPD7h1tv3t``W{!u1xceH%RpI$~%Q!@u@H;Bu?W;BbDvr4CisWJdErNG#EY4GhKb z&CDchYWCVsYO;WR3QHVt`^a`d#7ZR&C*hqGt3{U90QQXKOs`zmE42pN9@T3Go>gW| zydw@CJY+(i!E_q?p<`Z>7tf4}nudcQ+uw~1J9mLZ{GN9hoU*NH%iQ(&|4YJW80p34QSvUget5RGR`ch+w5p={#$K?{<(lo z|CYU|Q7e+VdY2w78!pGuL{~`$Mdr^{#TV+9p+u(~uO7Fq7O8g@c`b~as`^aOU&E^p zWT=j*7pN1KG@dAqrDmg?G|NWK=WFNra(*(xL4+YAmVS(fs{&T^2?G!9b7soB;G6&$ z{Us(0gU`lWY+WVgpXvDc2rSh~zvMFK1)iQaU);#Q-{QH0Logiz3r}1mTuYEIxojg9 z(q>c&sYX)|b|sS{{T}Y0+Sr({kuYA#UaZ4ivy9@lrg9j$Q13!WVx&4Sd_@HhiGjxF z%D(?ki-9yRubpx^2 z{%X`d&T{-o2X3_hxt_WsZ3cVaB^jbLOvS55Jd4+M6H+{ye6d{`A3~6wBUzuA(*wSr_Fv@L6apPT(s3P0HY!P9*ewTXd;3&VJuYv%}m-0+pTwL-8 z(^F=M7zxxDE_mKn#9eVh^xtoEn6!=NH3EGaPl0wwPDf9IXss_{{wCz$H5nX&_lqLP zca;rMnA@)lw{Kpt^&C~c{ob{d5>Q_H_{m4t3n{S^+CH#=xxo;15WkDtWL2cmDn#G} z#e*-_x)#3qousmgr#hFx^==?sJFTPC*iOI8^J7eXRlWH^`fT^of?>9d%%}jp;`ph@ z>9IK+d{g$ot6T5=jkOdJfetB%Or1rCLmH>EO7#SYKhGj`Ftc17qjg7d6uqK#jTW@M z)KU;$yJQ-1{@!Ge^>Kfv%VRu@4Qo-5AO9;d%c$MJg@uQYqQ1Ew`?-o>Ya6TQe9A2z ziDx!ZiqGY|9V``ZmHOp%Uh}c%N5##kzxG$LqCLm8c;@UeQu>sH&f+^`^Tf%k{>JoA z%Qv^A_WO&(qh;#IS}wcaHo2qhx9nL?`;r4vDWg6p7 zm)4j3^YLlI9TQAu3&+KWZk15hZMP z_2>BPkhKuSlEH(U#&VCjm#o!=*E<7ZBqKqbTu3#jAZ;Ntu`(uu9h$G3lc7$|?bO{G#n+l}8h)f~t4 z(PfW_>!-ccyj@!|)}Eb)*xQ!WJ?-Z391&xA;aT}=l*DnA3ctPCtUXK&H}t*^d!Amv;P`15R+%#?gr5Ms)MFGznAlQ@7O9!#=rA z-zZNJ=oC@K+JrU)mh_iSQwc`MBC;X^8w{LcnnmVqIT zEqYCM50REIxqvo`Z}zCieR+j!?lCno+i zWH*QaV}2m^_7IyY7))v9)T+{PBy1CD)6x3a=Sh*Z!^zQ z+nKZ;AqKS)1~D=5`OR8=%BMbJqrjN-FWfF)mUBI2Ma0AgnL~DmGISei>iV`hE3#EK znC!aWc~l)k$$5>;Ggg$Se^)bPkCM@RkJDN)BH}}iYaW5f`QNcI&DWS$0%wO{nGV!FRXVwTmy95$6~3dBPB%FuqtTcx#*$OT>3Fyt%11*J1IxRUE>Kzx z-m>vLtL-6ZDXH>+rcA}F^Hsd_xT0vK__F#zf2{-8+gd!y<5rOg-63jR)CzYbG$dQ; zdWt)mH(#+%I%@T~SG+7?@bIitVQN39LR3K6_Zj8k&{c#4gN59GT(YEdX8&(k6#UGd zW-n%3Z?fg1%j^^lKMLBMTtn*QV$Tvjnc#D0n=E4$jz!+oBY@Y^RL2sj^az0POl>T+ zsLMBYX+=?+!ye{r z4>qr>noV08LsZeJ<7@!4(LSnh;Mkl|LhFC2+V#|vJ`;YZUX@^4Fa8C3MyAAWHBwE|CKC|2M^ zEZXk{Po;?`t63$`t(2>@ayt7NG*uWo1JieOW835Tdz5Bq!3no563j9bLEnEQ>ZPv~ z+j2sP?2N!yvN4ER z2K30e_}KSoh7{X~)^~sPrD){+o4v9PHRcrNBx5(i%8f4744ZkXMZM<{g-yOnft&Ay zwGUrS$)?sWBELjO43Yg-(ss4DyB6iS`fBNPNEois16P&%OpI~Os7dX|(?nZ5al3QT z**cde$fS=^aqvq?4aVpe9?ofclS@Jp2=O4K2qa%7%&ofcVItQ9SID;72DmQQL~l`B ziPpnxT+s-{y~}T9r@EHg&OjUDnK2gyty_3W945_>enGlBUss?VofDzA z^Ny=;hH7&P;rlUaCK03H*O@20G=;2Y!Ld(IX~r=Fg|&}*t+Yv*0yQn>O0Ww@E%QII zX=Nw|6h9SoDSzAN0271#LQCmHEMh228TlhTD)FW1c{aR#OqxCEGqr|y{X3eXuZ_-4yJ9Dj z?94QmW~Z1nS{(xWnKKId(|FJorbk*-$g@7=ZcL9AX_UK0_j6~~JJmNw&Uvgik)~g7 zeYU7(f6^?TQ7l!s3HjPy0}9bt?1J=^)vCC_f`|;S%b2H(wj~BwTib$N)Ae4s>7?8$ zGp8T;d^j()RC#Hb5QxJNiqQh_3MLFQ9;Eg!G>RSM4=ueAQr#z3xFCxQCKm zOn%F`15LpJv~B+GncCfZGqv*K{r3u-NFKxdTBAll5GQdty$Z!r)q}MS;iXT_U|^B* zLjULJK8~yPyZsuj`q=u&%5d~x4Td(-GfCzyDL}PyyNF6P2dN3qD_TJ0mAEN`b}Mcm z$FszkW-Cl9q);M+DHMlaFb?r0IS3yQ7Y|`qUa4X=rBpx&Y}@B_!uo=KDez~qfA^Ho zeZG;&%WzqJ^)T=#K|aykmVK>)! zpM9A+Ag#<`CIODw589zy&c<-l=j?af%01nrj6G^Ls{OW>oVZBwm1jQzXE?ta{n#-| zVqr|^2Zy-^v$>M8A?{=;#l}#8`QXbTt@7?}MY{@xg^kAG2DV)N+_J^k6Si=YS;bSc z>lHik$PcSTa6jJ7mcsbyZS(1p5{ZzB)mpVhsZVoVf?zPcKb zreoC4*BZ#Mut?`&MyZ(epBkt%F>_ZKnT3CB+taTlQ5|$pC3_wW5 z!asLgs_AOh*N3V#t@fwS3|5WJ+&5vPl0(wr5@klrCC1y3pu6+gRsQ)F`#my~Qg?Pt z;;?Qx6_KQjL=T%V;inB8^M|{JbUS2|;M9TrtHk7=+L8 z&RyKmd)B+lBc>qp=%_Xvon7ubCx&n<4?V~60PVGtO>F7&+6|FI1K6IWh?>~NosGm_ zkKcSxkL3&z_)cuMH?fCWZ`$04Z`Y&spo5}#ToyxzaO|mMZ5eMX|GANP@>GGk=Fttw z^>-DosRnu0;`_2N4fIV%W~YCG_i$NEcH@?M`aEgl3WEqABig z`r|)=)|a|N_Pzxg%Eu3Y#6!m_q+L`>4V!E8_-l(h%Dp^PQAbIPc(W9=TCZP=RnrHV z;BwhK1Ij4Zaf*$_Q(2Cx9c1*4Bdd~6eS_=qa!I#iloC=Zr&dEAUE*+5y027k1{+=W zXI9_~R@@)PHqYa+31wDqus6d)>{+PqMqKm?3=I1E_s@h9YZpWh4&znsfiyj9;Cyv92uZ z?^7`BpGwrARFr~JIbv(v?0x3S)mc!cm#;R4yAq8t3o*?rCbYLEGQ!8potkqjT5FP> zYd0SW+r$Gkyl!VXY45B*PM!Xs*2uF*ZyIj#=M*iWC%p#^j8D2rX`}!$0M4kud*c5U zRH0=5Lr~p+5L8cH6hO-|n~CyLWCoZ+><$ntAxt>~Wzc7{A(A$e;(59ILO?E5*FeOb zJk^};ijoDd)*$oCM8`u`2e-D7DR>v5fj2#O^S8C;23#9Tn{Tz`SKM!}(p)|^!sMGd zGw{bzY2b(+s)_o}8ZRy)6 zc4SS2k@qp}>U35GLFAXuJ%I&LUUoF+Xqb`|Wf~VlS-&2r(2P8x@3-OdIOGrc()&(e zCEXJ4&j8*1fjpb+a1?A^F8^=%g)%uVxaWMK@g?u#k4zSS zk*^Mlolx;8w$+s7TAZ!7;7>LRrjKg3I^nXI0cD~pDshojwFQ*|$4N`xW|iLuhi0*c z4*2(lH$KIkdfZCrg^0cTH9?7#O_iqNj{<2TT`@8aZEQ5OnDpUjp(j(7tDg;sMW~&qQ5M=p0t^S7^0Xt7 zgg;asn(vQ50r!5r(CzPla`so}Wd8rJ(+t6SENjEw0@C5$HAO?e);s))nnnLP3Sw6n6KF3QjBlus!it{sp zJo1>`4^7G59DSv(b6m4+Cl~P#!PFL!&lo<)3wG?lgnajZ(=R92wWt1aHH=jxSttTJ z&=6XVmlFPR%&pM>cK2!bjqd*if*Ei)K5l3~rmvjw$4{J;P71!fiLeUXDc^6&`p@;agFHNio zuh@*R=aPQ4Tq8EP5n5`uk%ImD{_&jYA@ph4w%Z|slvMS7;gRtdB&YXpkX$f$czeeo zjpwsoJ(DDE^DwwhRjk956lBn|BK?A__ZJn*IdHl&*#boXHRy*S3t91m@ z#xw}84_2vO9aO3myXl~NtqWH{1B7mMk*vW6a}Y!Qv+MKg?U|-0*z)O8(bU$*gtwRu*i`vBLTY42){s+N8=c#zA7^nbK2@KW z@y`xfPea?*3{rQ!sE$o8FtpJ^-msp8MJXK?u&rz5F{U4ly=2LKJE@u%3Fi?oqfWoH zU&_zY9Y)9HBzhv&r<5#1;|q{KQjg0_pU>{ydz-Kl%tSRWC((1crSZ)xq-yTeNAROa^g;ZDwj}1fMdvQwbpd=xf+rG3oUZ(LW zhbgAKvp`5Rj2PKXDJs`vqHk+rAId|Mk~#_pITIFU^yndz-P11J-8Gx7zywC~i2Nk; z?H%7qI5=K-xa_s-kj$Fr?yg)Eleh^~vWCDxY9`Ax8|k@RwoDR2!pV$8hh27Z>e3zOzj9~c;o*rmhClXZbZ$=+V`_UI z#lUKL6&fkCaklOMkYezh7O1={%Xnq7Y8D8Dmm}Tqc>+;6Ug8%RZ~WQVyALayXOOG zretU!W>!4jjh7GL<*Bua5SfO8v13Bc<7VbydBZNhQZuHA$fKZQN93$fwNr=?5fdZH zhfu{MA}xNUa#5Fet^4&(xB`Jkhi9EVx=BxTkTA9^U)K|(f*id(6b{$Z`UP4!$3W4O zxn_hf+@=rk<~)}+e+3$TM1_lz7owMUapmOv(l>=O6+F4RC{Utwkv9;`@oO`nHHx7< z*0eldrv?4`?X+nOZOP{ZXYxnpfnRPOYwerezL-0)w8Zs{z&zKdYof9u&TFsZnDuVO zL5s~GS=_qVr_SZ;wzHh3l;CiJ;}riPUK)d`%%jmU+vwY} zlX^K)Z0M!;d9s|#oi34gI*V?(Hk*4tB0hWaJp0{IPLHnEr>Gb2+s|P-xODmER|3t1 zdfv*GLt(!ilNoiOC%?H2UMg%p>P^Kh^Er->>@v{|wudm?Je4hzD``9vif`ya?}J1` zjYGed^kRQDC~fG>E@bJRFO+5&u_!o17WVtR_|(bPK$DoUZ8ykZDa!ERxO+s!8KO|4 zPYU#h%8H3tLV)!aKlyUYUv9Ykr>Pk-+)y=DPH|I)%EFlx)sd|At{j>MB`t zVTr>!wYeO3A~#trQ*LN`C4|NWNvv!q_BAgoCT#L`@=8fY!Q?8< z6h3>JG;?E&py2Or<4sSYEWmc2u|XsOexY!$cNm4bajrs=!GvECh0D%#AeabS++S+r zDwC)o=)hx1Iy>G_78xeWP%G4|eg)jvN>$rw0fLqc8yoq;Y3ZwBG4gZD#4XA5<*+lT zgnq}gCqM3P{k0-<5N^t4Bgy$DL0vr;5g#M&&)ABueRBx?B2a??^(KSSK62a)vhQDd zM=&L;EZ9OQ5rP@NPCS%fd7+DZX`Rqm$I;eS_S^xJDoUfGaQO>X^8SAbdi);dj$SW0D2JJc#laH z^NKX^zJk6aC3@M8x*^Jo_3zgT%R@I^kc`Uvsj5`}aYh$G+2rFQz8&3Gw9dQPyn&ZD z-RUn)qsl+?{4ckTX_%1Owi6ozKAb<~7%&rCyfMnLgj zJxigEclBN>m7b25(*AaNYx#-KCV|gOw1X^NY2!m6%(!{#u|6ljh15ePNZ)}VH-BkG zc73>VQx@}eJd1ikIjOesNz5G^XS;Eb_v2gioPz|ArXDvrjZj>ltm%R)gu|$;jnk>ZM1tE?)9Pf%RPRgYgnIdyt7bl<(@z-HJz?$cuy4S6bxb1L7Czza+`)op z$wqo;w}vemanF9sfYZrFyZzzkYR_c8qRS$YuAgh)L=NjPWW$i+(LByx9(LQMtJ9J< z>(=bAW!lH`OcdlI!}H(G){l*vbdLZy0g-vC6fL4&wNz;=c>mLQ0mw|&8nsxR$oMMT zFFhD>59Tzo?I}k548^=f$t+j*mc{6_vW>UK#Z)IN$K@Tp^2sTx5Q+C}>yK6xX_-7O zSsKI_6i4HWln|RQPgO^!gaH!(iG>>n~sQLX1^`K zafZ^=-hJB{JThBYBozs)8{r(mv_0`bXr4C@gvI5{{;FMbJ!|vu&LCg2ko;AmS=d$g zlsyH{lUl=A==Nd&#qDl>Y$tZ)Y^F>~;ZQ&~wct9ohw}}1f|*KTu|AK~V>-SDXhF_g zN(Cl=Qa5b%|I$SbVG>_+fv#28Mm(vrmAiOhV{A#Rr)7Uhge#cOO>D48Vk ztjN_+!jdGiV`^45{o!Vja%mIQ?efcpR*tF8PgNv7=qqQu2cv2p8D9n4#i}!(V!bU z-NdA^yPS-^R{#NZ%4vz#x$*e(HUqJlv8^%j&VHS6BdKId^)iE@rp!1MuH=L@iZLmQ z&?*w6jnspa-av1b`ESI;)k;8q;;prhP^#DTVY1mX@Y&K0as;ng3RW7c^)n%JTA;lv zR19nSa>An2G5oFlKrf0ZCfOvZO1;v?D?O1>dAn3uqt7mW4MPv1MMOoW2q+AI{y=Rl zp^D~jlI`)swhInRcd~n3WQqC6=g4EIaq#=udx?qFy_U=B{eB?R8xb+oZ3y@<#~Gi2 zizWhv@f0a649vQO2AFOVAqX5Zl(~G#W|d@d-Uh5NKc#flR_TFakig)Y37Y^t2$sDK z6_en7TjlrcseDNe9xqzYXINtOa$eAZK6st$O5(#^!NcGap6w4YPB&r1=m#9+vA8r( zpNh|IjvG-jxc;K@!@KQRT3-o%tBzzaPyow?riK`6Gsmn5Ggh1Uruqo8Jn7=aZzAiSTq%nwe7NmmI_uVuq+8)?>Q`_j6Y7J_(a) zjD^Qp1iS*1vkZzxE02_`=?OX3XhiOsXXw3*{kvHTRNtICo~sm>OtE+LOMJXnnYzG; z_;Uc^pYXp4|L}yyH(HSmWzAYOz&wR2mx^y#H zE;V&z(?46k-=Da!5oRzpz-;f|A{d7@vw{-T&w^@M76)>TDMkVhwIU}5>0T6qTkhDU za|Gye>J3LJ3yRn+!m-lbsf0*LM4Dv+XrGu(s}qtE%iU*b4hVCy1-#+7<{2y)Nq^ zhKxr(DgN9d0*cazT2&%NY{wA>Q0Ki_U$2>39EouDw<9E3$b1AQv?^WXyIXuUmULMorO0h9gZY~d}L@@N`)%1a1h>bc!;)K zZ&5-4zHR6ug(f3&3_3L#aS|5919MG2x})EcDswk)$(g^)vIDsFwYlP*kl#dGjgA*D zAn{3weeSV^Mj}AmIQt$1GG?*nTQ|glUk^ZpH2Sy?=9#p8je~FGEFCW9fP2AcDP6`3 zAMm=TV*qZh^Q17AM^6Pg=$nk!OPUD$dZR}MCcB*Hp`)NEsC2~rh#B*mW31B;tpy^=*{+O*YXEvzN`C4k& znx(LNn6Ffb>9AH3hC39(L74mwIteq^9zEQfpP9-$PcKPOXJ$apOaV&QaB=djQ|j$5 zq=5dH=85==G{e;@q8NnnDKaP98|aly3if8xUlu4y7EcB+gd@nrc)v&J<45CRTh{V< zjNAHu^S@11sSVRN7rCdapFJERiUf4E%D>Xp$-=({9ox1oDr^G7-^isutX*ZUAFo9b ze*Kl(qYDXzo`n$X;du1I`J6xCkHRgnxVjlycGL|_`pn|hPAMTHp+c)wYz1+|J@?J( z*VK1M1HP>1Nw1$Z5(+i#iU-5_1V~)2+O5DJr!EGx7YU0T~tW0TXKy$Ylv}=19Sse(DPi|Bzo^P7Bx2EW@ zNbOBj2}fNjHaB&i1IG7K^#$)9DfA6OEw<(Td0vX)y9%j*L_|w!m^uAb6i4k!o8?bt z6D>DBVk|Wx8C`L35G^*F_Jyl6bB*e#LWOcPS1>$n&18ZE`VW1X5#~#*O0tcvA9|kf zdV0otvf!{yS9$YB5~#H3IhgfD8-6c{d3urR_#?P%|2U*#?T!NDXPsNGTEU0uIve~* zd@iVzP2{^~XVrWa*!WlKV(zh(fKW!O_`xY|43JL{kpMs107up53#nlLtv`#YiR%G6 zN3>T;6d*TfM1YbCf;fi&IWQ(RVmv4Srn_eHk4;-zT1*Ig!N`$5c(EyT6`7FlIX~`G zDnJIIYKjuK4hD*LyqWLoWO;M4;GL_V*nlwv3s zuxYLw3C=Z5Ih`s~Gi!i5jN|!0_wz4X*_WYpBR*5cW#{fchFo4a)NY%cR38QG5+gg8 zh|Q1lVp#9rP}uEKwd0MS;=Soc;#6?W{5Ddf^g`t*Q!rsN(fB?n9HF8h{sbcs34c?} zfcH=>ki&<;V6fh?S#H0bgM(K=dfF}qWH9#iqRK7m)rRXA7QGbrcwxgoleF^U=zxId<^C;ZXYnhFG+a5%nM8VPVgCz>N+p&$8V}#Rh+5~yh(rYoulWHVd2wS|C8}-&p7R9@AioPG zx_&Y(b)J#9e#gcrP!TCv8UER>c75p?&SU-KNRGJ1OgChQWzc8&J7V8Aa`TFkz#b7_ z6H(YzFpFMp7{nGG(JXzsH6j181|tLo4j%q%dkFdgc!85B>07E@*Qyjt$h}iS_JhE| z=x!bukpL*v`PcOot9{u4PWI}j1_(qZhd%ec}6%{hWcGIANO@1EsmRgFmSX%v@lXh^x_wzOl7b)(>y`(Zy-ZuJ$nSKn zAzN*StI%DzV~j?9|7zeZqNvzR{o!icpnm4dZto0cTwnH5-0tVhbC=B@kLZo4lxJ$* zL&=|2=8q@jI+Sfs9b9yk(Uq8}%xelIM5Fcu3}5Xq@b`)F1VToo2)$2Xuivi8sgeN# z@W9?aiQ|-8)d9l;&LdYD8=p~@|MqfwiurZ)RutkERAxr1du$FrGvcMJ?p%Bu3a|)3 zXdSpq-W@uS8l#KBq|r%e>OTp@txismyeCs==mr?fwLlK|Jvcz{u=;}atp~+@571{V zUXpeU&3zvpcP~U%BL)Fe?d$0`wP9qqq)LAXxsg6FVf;!~%*NunZnOOE8*syY?XV&t z>I(7;Dpn%`I@-_A8-o~Wb#;${XhJxzX5{nArB^!zdD|SknJy7`s7J%s-i{CnEP1<= ze#UDz3wZjc1Y@YB8Le% zX+#3%^Fo;u3+*jI_I0S3K}k7zE2?2V!*42byOTGYw7A$E-0L8ktbM)GLTV|(*TXC* z-nSgP@_5Chw9|%pI%#-NpG~zo(*g5X&K7nv-p8?2WSb?v5std2Ru=0EJ)U89)OrYk zLGtpf2S&(O7Hmm5=olC|pT@g+d3e4IoC9YEiv(uzDe!W@_Ee>KB~>lD1iCL1IT4D9=9=1Mbo0R~Am`-& zA?z)K^4hj;-31B3-4fhgf)gBqySux)Yalqm-GjTk6C6Tt*WfNegWtj0`+Vn|s$2I~ z@e`|h=Nx_X)?07Sa=l1PX~R(6L{qql_Noq(^D)N7LfU=ipU1Ggh4yWZeoun6toIiu z!sQtpZ<_XXD-s;BheDov#i&cH;|e&4t(HgnIc4H!EMma~j9Cba@T(Z{kKgA(O;x%L zcQM%8F9@?9VqhF^*{N;p%w*My3|>}@+ko;0ht?{Gb#!TR+i(ZCR| z^3L}o1{A`Y;NECT6wtO*j;_2f=@U4de(1J>pA=)}#eT0{76 zzQu)RTYB4R7gMVdT$C?O!?al-J}(bkLO{RDEBCL%=-&&@%Bzij_)n}c0=j^)DzMuV z6P-aYE<#`*SI!yNTg;A14{c8PzxJ?T;-T>E9nuLfW3>i?{HcjR??aVL5aa(j3M}PV zj>Ls2Y3Z3KaM7fpU>qb`FKIP8&U51NbVQBvP0Q$CTU;Z?J>vWpd_rW3)FP zK3R7Au3DjKzEDk^BAr0n#3-w`{w=}{iFA9zrjK&qX_<|U1nc+lcEEs2Yo20FW3#tg zS5&zIj~k0O>lG;;k2?%VMI>K)&x7$gJQwbX-!n!s)KK@b{(EnAdgk8>^~f4Vd^0eE z=LKSpIU+v(0YPF%?T96dfX7Awcv?B&iT(6#e}6J>+WB0r?u}w| zJg@fscQ8l>lImSgGyzjg#9RoiHV}mxYhQ|Xe$lB&^Khkw1z@1^3X>fFUo0Wu!9&Px z38eE~Pk;Kg7aEptzX%t0l2ECQ-!?}R!ayVOHek%It4bc-m<6p))Gw#lh}dgRliiCZ z4lN$z=ZwuFT;!6A^2{QMw4Bh~qz`vZa(hrpI;V7gauo+pNBhR{VX*o|pT~=CwWSMC z(KBKSpf7j(ojAodNUIdoYx>-eJ{zl8O~eRU6=oO7gT{_faA%#ci%=>@u{R-+YzM`r zw-X0e?;_vWW~s&o7wLHFCc_d(_H2(D5+{OBEQa+ti3iBH zmAX7tVcA3Kk@tw_nnh!|hN?&8#PEMln%3>5zMg|zMY~+_SQ1quYg-YiUC7Y}>Yoz- zkw}5#Zs`-VXjFFeV}`QSqxbfnu_naJ)M?x%rK>}&b!YAb z+@1Zk4(>Eo3nAje47M9qr&4gjduvDOt%Te7{p#-xCBpwRyw1VDcEm-fz_hX8ZV#EJ zd=O1*c4~B6$-B_t#bG-&BI$>tO{d+A(vg2HHomrsg5`mXZFkliCbew-&>34qu<4e6 zE)02*ARIk_em?fa-+eFBKa@QAj$bw{5FVQWB!ENhOK+eit@wVc4kW|*?i$z&K!@l~ zM0#4%Bc7+0YiJ{|vqA*e&rmTFYEIXTqVe?^|A8vB4W{(Ct=+z}l3LzH zt8I=1v2!^NTLY?W*m4`pAhEKj4LSk=oGUQICcIrN+Bp;4mL>KZJ!x+qEw6P&TmDr? zKcfGibA{Jmb3^#kZ!neAj~2(#*-{N>GTT}Q%61;qkyNdm{r+)4fZz2M2*9a({1yFGLy^xVL~!N(bumz)0h{}5*+;=wh9aj zfaMD`uFgUuJ^xG6*?HZ6jQ>nA{GQ^=P`U7L^3MWmZ**(lFf8auQ3SF8yDO#R;u~nM zfT47bDt%;@CQY>3zQGW^?Z0P&>L>v{3`G0SCdRj;(X?*Jq%@}xkKXXPGW0mr7h?sq zZueIfua7sN?m&jPIXXD}00D*PG%a4p-{n6rNf0-G@Ww-BLZ*Ie2o=cJE<%gE`g?09 zr2Ugr?;i`-dYfe|Hwzci`5jo>8!VMpRo-%BZv=idO65P7!VsG;cb*<`dlkZO2PrnJ zg?WNLeiIvwH!J;tl6aE@EyPcg37@r%vle%xFsv#%1g>?gaqwPHSrUMX?t9jVJ$Y;Ldn7i5m2kl zg|JFT+zwrl7-#Z2zSl}-R9NE8{MO6E?p48px|CLyBmpjR>&X0q{<-AJ%X755k@#VQ z(FwdrfB3xFJsLZUC(rKJcQ=@QdW#DnvZDj^*1Ulk^zq*fXtk$1l|-KG`okry>0ok6 zq&7@R10FlwmM`hsGW!hl;Y^e+zE(t%s^x~V3h7|jo5UWNLrk0k2D+xi6w*Gzx(SYP zVvbVNy!TmI97Z^(^x2HsilyTQE5S$)GvW>Ev{MIWE``0^ZsQ!QPNa+H!QgGpv>tY9=i@0$0to?}_((WW+; zpUTyu)?kQ3^;#gjM*iA$QsfqKg%t}m9PUrH$(8b=qSmevJVJzH(5IDVmFRe|98jFF;(^UcOHb+b~2E9IBmb1yE zz_%(3P%^Rq+BtS%NfQ6(T|uo5is_FCypyA6vzU7On4BcxIV36NTwoK{EfoqRp^iXk z9iwFW56$D^LaG3`H37gS&Urs-TV0L{?}Xdm8M~?grSs1I0vK?nzYGZOc{qRE^ zN$X6269s2bUH}3m+Gky`({g&prsXI@*zr}w?&R( zztS>@k&_%Auq50#3jQ#2;9_Ky9)hF7_`+@98&phbp70Yt$a`f&TGy8I!%YbGRmi}A zk(NH^sRE~n55Ef(+p`g_4!nt2YQ~zzMSCM9@U_1MY$ZiTmTn53XA9Bh07-1eFgFp`jB&Z}bC-o}*z&R|vA-+(=VWC3J%j z(2vp8CTFF6lfeOSoW#<7 zc;O!senH5p9Om{7DoveuHU-A?KbbL40Xkbyo1yxbaa!N3{unhuCq+{Vu%Dn_mne!SE&tzM_EE4j8335QYpkf>{yrnQpm2ny0ty)gvC1b3Wf>1_F@w8pbs zaN)Ee2IJl<7KW$G=+YU*W54ah^6w>%odC(?rV^v2a)~WIM zeogBy>>SWK4#G3bP>t@AEUhc z;8d_l5rVd@G5HbyDgDN;;r{t`@w?-p!&Y6y<>5T%E0w;U z%G3pYNR-qX#5#s!@+p<{_G;6FWZ3eyX0^R>^sE<0y zY4d2r4Gu$y_-M!eW&NN=vV|%Z;n{{Y9H#L{yySmn0ibmf2%+_FHG_bq55W_E$?AAa z92SI5&B}y)_0RY<=ojNbOF4!oLjtq@m6PXabfNnj&rVX3RZf#84@h5 zbhrG*>+gggxz_fdbCu_`Kkz$cK3#rah(4k|wz$_mOG2v!jF=p5xhJ~wzzGn`yXs-e zfCNUO42lCgDSuS~8yG)!TK&!TXe#<(G~s+~l#B~=HVyDdQBbR-K+K zNS~;w2d_Jp>S(5{!7>9au2O7L!JcQx{JO#*U2ZC!meP_1 zhC5J8=~J0v+uc?aPn@C!w^#F>HNb8Uh;50`u~I8ZnuOD!x{#%~V8BGZfhqIN3KPq@ zRau@VF@CBE5Hdi6{Y>xT{$!I-p{XNN5f8oI8MaHE|_biU$4W z4iYYB?}5;2Ngd4ogXVdPGz(=TlFT3BVy~x(K^=I%{k7iQ=waZxC*b++VynXcEDdwy zTNr)WLEGL%3yV;jWL5+TRp7yPjS=g5m(`qIC{kwqcwR@Fo6$I-7VE?L8fC9J@i!*w z0}7uu`h58a%@VCH4*3#ClUb6-NUoQnMVL+}u?7TMFivTIi68JT(u~F~Y=790-$tkV z?I9fzO%aWoXIb>$@rugzF4eKf5SQyV%=7olj}iu>m6ge|CLz0p zFRI73uyogS&;R@(_u-2e@aN3m3t}1BuJeX^^YM$gucif4B(nA=Kh*~DH*kZCX+4gG zA|Swn3G5N<|9PJv{oe{qgzCNs?yKd=C!s%|5@Fik0tI7E%07mEp}$;U$Z(uNI#gZ~ zB<%}O{t(De>=-2h=I5iRzBDUhPB~)8F#G~bfg<7#twyt<6Tz&C6@2;8UZ^Ms1^27{ z*r?`S^o6nKyJ{vXxJe1Q7gBjtRMH?iLH4wJuUy^EV^hQgrm!thcZ{Ufu#XXen*uix z?k%}m8yWU44yvo!qov~DwjPpaY>4VLVx*X9p-5s}Ls_Q(%?mnXDlEpu?8LjzPTPF0!Foxpp`FEp7(S($ zpt7FvAeN5Y*dcojWHbY&^nCg0YY{Kj>*^F4R0q?p0M#0hny}>s>9w?F^=*BNb7mVv zVmq&$V$ez$(EM8o`4R=u{Xs3XGaR$k05g%`DMb)q_2gA;wQ6Z%M5E}$L+3EtKTScj z-+1%3(*QjxD(bKO3R&|+pPgp@>wA`U^XBB%W>#na@&_fA_%=kt)qG{eLYK&D9^(8o ziE&sI^}3Z80!~0rfnt2m0?YiNo+&8>vu1@CR|^)g)p{WtXRTS2<&&aw_SuYIOlcP8 z1~TFAJ%>D%pXYq-x7VER@a;LLuNE|wo`z4KxHq=`O#lA9|9xo;NFde8J$)qwg;g#> z6%_QdGp>oC{dq5YL&9cvBTeiON)Nt2&$Q#6xwLLzV!KIAMb``0Hy`dstKiJ(+tPnx z!TK)Mr-FL@HSLvFPZ5cN_&;}#W6+<6(_M)BnUPP|2dznlc?v|-Er+vVWC(qkc@ooZ zzkc2$H}~$^$+ye*KSNaQ^Y4O4rq?JxsF7GNMk9`JaXLsb4@$WNCKh5szuW_I>FtA` zaT5{u&uPz^QbU{PzC09{=Nq3hxE$~}7<5?KDpJ%)&K-VyhDO%MrSDdf^6qTsvqzDf z2#}q2PUf*;)cp%@IGmTNsz_>Vvsw@9_te6nG7C3rDevCDRdKNw9tJWQgZ+?#Kcaxu924@7A4-mJ#*SuTxY#oS*kD6Jv3 zNZ-Ky;hrQ73tF4Y1z~=Ol)7Wze9oi*i&#q7aP$6~U`;J%EU%dCYENuHztjhr*1C`e z3q9W+5tVqksN>?*a<<1%=}jSn?;dnSa{fZ1>$GAwr`>#r(Gn_$v!9P_eN>j!+rID^ zXL19bZ*V!;(gYs!)yt%he2~TNHDb&|JkGmI22B!RN-5X#$?^VRj93g>jAWjt+vpyN zY5rZ)^g{&%l~!ejVLavV=lfO@Dd0}d!H$L3>bjtzQ7b7u3eokA!rDIm9`iNVK1?%b zF^OLH8+=j2=e8ZNw4TjzPl_DWF<-PS+_=!`0uO1YJs-j9Hw&W?f=tjycrWbmn`Gttwc%vu!}Xz6h~ez=KlI`ycC4Z%16xI-Vd;6eZeh6aOMmz`lWg6#!O(|+^{m|E-@MM7; z5|JObW_%1|!j{ft$YFo0b$c_v1d>h>n*OK23^GsufDuuN#%* z^YFeeF2t2tYF%P(dG;xZRCK1AWMKRj9n()x3)t6jSRn#SuXoY^Y_k;#6V;(4nGE$v zdw34XO1!OQ054#oOSF#W6%Q>+t?gQS7`x39)hQo(L+9U3co7{{6yQ_X5j#+@=D}Kb z4=v6&3x8wgEn7EVf~L!Gwf7Crw$eUFu<9)w?m5*4EQ2W7ZxBy4TfiA8JU5aQNv$;k zL}yamOsQQZhVLXMaXoj?8|q>PnDG|BWoVgnekP_EJxT2ab+whLPD+DHwuKz09~u(u3@_V8#J0-Tqb zaCN$IDRcx%xh!EmoPU}qR_}e7H(e{M3rh|02Y$4@BvVb)=jE=g^+deE3E13Onzd%< za7)Oj!Hh~xiRtGa)vZFBGLBK*ob9j-w5ad|3VKk~OaU|`iBN(#<6++a)xv=gIaS}g z4O||$mMZHB;lp$mXrT6)k*JKVwRnE`xht)vN+Yi2d?=ZfoknebOe)DJG^cxk;-cL9 z8RJX_D3bCW21*wn4=OJ`oo;v89-aOYZGwf$B@KEZ)rBYdQNouB4cBt|e32ovAY5h3 z%ibFPp4I7;m5%elvvVSGyuzgk-J{bE`2vQHv&VFhrqXk!M0r>j-W4?T)oRBx5=Fk8 zjK{}2Mp!uO;r4XDN7NGa=>6AQvFH%&S&3+HPVeS<2l8au>eh`s!m%gOX>>yBp1mZM zCQ@!MTHaihxWC6M*A%V#?G+G`oE(*aiHmQO0UgH<#Y2O+p-6NnoDyV58b2f;ez(>z3DtN;|wq+q2Vyq2^8u2$VUpf(H4L=Q2!ozD^b}s9t7E7(Q6Cz ztz5Dx~n+OVERDSHpiUbOM zZH9v*KRM*VYhq;q`h-S)D!3C$ejh$RWIc})exqGjV)8TTHH5!}^XE2L8oc<7DXd$O zr0w;^9V!<+K7EBMaKH-6TY)B74$dHA+^K_uevJ?73a0IBkmx|8mgYl**MJG`_Ki^d zN0mu?Wnb4FJ9h5kUb(x#DuGu|H`{*H%c%K~e`O*)Y zC@RWdz*Obiwzmyy;RL}cE3tXXUr$~?U7vtL>Yzua^4^C!Fr!~}ArOzJZ|?$F4Xbbo zdasob(MX)wNZXx8(Wh(4;9$yE;rdWdi2j??4cMdG~yYw}Lv znkp2ah`Lqj^-pOmp^vJbd=UgS6AN35&kW91uftTnw=*3C1{mK{q%v7@qDqV+J5&e{ zEwwqYMV+o1DGX}49EFt%S29b(`{8}=V_-rZsE+!yY?(DST4METIjcozK(w0QpT!En&tS0$7tw91KR$3dt~tmgCd}0r_@bNt_u=UAuGwL+UEVYN z-6#quCC!KQ_v|{HW~w8c!5?-<85GOF8j#wsz<%_6(>YHa`&#=kt^i}|;2gC2K`72rx%z}7R>`R@<_t@hs` zz$$o$Hg(>!Tfb+wTg$Mta~B~EVZCD2z|Sbvr@#b?unBZTV`k$M(oGwxrn?+(6jh!=LL&CQ&2 zk0uafS-fxjUbNp|tvzhtbP9y)>b)rNfIzIb8zr9t>1uU2ifGfYM>0pE)^tsuegTv2 z&1Ro4r0g?>(B?*ONNF{j`NTh9)C>J`IGxGu=1g!4*gb{V)X!OkF*RlgmkvcKfJLYH4Q9l0D#w8D!=)vSp)4i6Zx*a$GjWbD;Tin)R{ciJF-EuzcZ6^l&H(i*sO-zjC z`jD72HaF}jk*n;hl^h$dLSt+SW1E1wCL7wRUnlWE=%ZM-Z#HDS;L=Z9;NAfQ77*4( zYe!9d1nEJoZ-|E8CnVID#eem-1i(7K1L-Br=WrtD4ZDpZ@!&ItiX zfZc`$t?^Y&`2EXZ(c11;ys)GuhJPDgX#VR_sn6HRx5~p0D=UC?+zQJ z^A8s>`DJ=5Xw_!mAIpXl8IjkXt?R2Z=%J<}V)}iwd2Ap9Y}acF+R|J^0?T8%m(^MO zTldl`4SWSB-OVOQj2)i3-0XU@(>W|pzvd{0#k`6w@RU)4h5;Tqdka}$F+gyFm#*mY zBRkhXwM2U3)5W&v8FBAWeC$OJWcs%+fe4ByU!Y~Q86MxcIFnQ zZiHhh_v`mC`XIIQ#>>o%>hQA5VL)sBZ{Y-j$)@d53G`_Mcw0U2kKW@iKX?1w&p?87nt1z@?M8bxiWtGhaiU|KTP0`(8-IG_%uz}Vc}9H; z1$nk9F)I1uv3r=?FaKNG@JB`Q7~Is6tJ=#2ql9kCv=z260nt}q^#^~?T^l7iW~QPL z9)}c#unzVsfXl1#pilwCCJlw;#USuj-=Ve^UB+OBzwH(O!g{x{{X29X(p!POC|M)R z+x_?P&=b`$QdBof;xIarhWd}65cuJH^NZUMt+QpS8_Sa~GQP8wO1O1diX@d1ZjHg)g{JwI~ z3HPOWq+=F)>&&qq6BwYe89uTT07a1+c)#eKKK4078o-DVdZu6 zu2jbWk_a6F5gi@*j%)KATe1Xb+QOQ=W%*OXqvXbaYRop_^prHw$x0Dp17I}(<6DFv z%~)O%)CfaIO7zj!@z3Ry*-{tSU|~uE2CX@d8Kbxwwi3>Yxol<=xc6Pc{`D z{Rc1^W+Ay6f4oj-OqNgQ z7BC#=oOb+BmheC&jwrL%t^H^37tv698ZZ*bOBp5G%X zU50y*gSyD$b;W3{)2Ln(6(=7S)4xLycbjP9RJatBkjSehd-dUhy4~9+u{FjNxCLvG zG?Bs@ohPe8eIFON-oQ1uuat;By)&OJtguWG2(sit4MEm!GyJe=WK9awI$_ue0+_xV z7K~WR6vSm09eK>g^$U+kN*18QUc^CH8um+Vo_F$=ioEt^?)$$Xju+}D+Z~{0ld@=2 zP;Pg>_z&g$si6C4kS}W`q#9&7JDIu`pPBhf>uvRaLKI;0_|oDxP@+!STzft4eOo+r z@7ly8WBP9v+jj07tnBZL@81V}dXlHv+CKi~>pUvWYZ5fY(HGF-bSyM-0n`{bXk>{x z`JnqJpgQuReL>Lr>vuEuMmC(}VXJF3^H=OD`Ck4PGLZ}F-2E`maI>gT2=wsqkxT+N zGfqsV%#G*pa1zhM53bf@Tq~$tF)l}9G zvr;pkPrRTt&O`qtN`H4juO=CSn{hM8Fm5^Ks#6dsRS3ui1;d&wo{x{EWv z7hC{p%}V_Mk44x03f{F$4pnSgb=87z%RnbdailCpGnfW&(&DjXcKUm%elXZ%0v0R^ z1xFl?gdu*C@8)=^q63NZf??-7rQ*(cXPpm7B;W1TC}cB2fjFAZ9!sU|s#~}1Nv+)o zBKA(gEQHTzo_++051oGT@p8-aK3Jx@_a=@Dzs=FsxvkYjHE-y;up7ejiaYS;Wea_w zcdGon6RlTvq_(XmkV^`;16v~g!G#BHP*&LWOi=VG3JN)QJMU!y4ZgqlY5(r`a;S{u z2bU51!>t$RfO4=s^*)!p4tK$^GEBeBF0X5p4VGgbI~q&NQZ>Z2*?V_= zmCeGS4yWXx=g3ZIFUv>s>efVr2ug$5PUAuAGFw!^*MG-^tf)Z$k#MZrKrE^+&#z@s zdBQ{_hP>yoAYy%BM2@fVac!+r%It9?Kq_`NhZJipJ3?>9j6qF=awv|E`MEY4%b;J3 zkN_pW&GAYrGnGhcbeRL(=U+6w__W;F^5A_;g{|#NYcflA$l5!d5-W246m#D`rz8@< z^Cu%BX71RHJo&^&TS72tPvGazk@Lj+H;HzgnmJ>6+T-{oS#h?f_54{mGN}8uF;5~Q z?oYF7)gGB!Cipyl9gA-6Af6 z$3m<6R%1#{*s-d!{+}Qk%VbKCKH;y?cv11PpZXo0nQ6EFevTohCV#l$M;Oc^Dyikx zN)`Q#>w);R|FvJeP_YT8i-lc%6qyWiesNFdi|e6ViXWppgjE-Z7h?24<}Ls4smZ3p zy6#OktX$~==UcIZwIW7_uERsDN|A)?a4yCRaro6wF*5Q|~mVMS%NB`=@6Y z^0=zh;%{7qZYFMTUEIf-kd6eu&9nY%uj+z6ZBBAM-K<%1{HNOKZ-BdcBlIX!;8Rwq zIt-H-XIXwPweH7vWBye@)O(jYXI=>x-uel%?9t?@Uq^BB6DeDHt}&Ei47pJU@Xd)T z)4AMZE3#fU6DNIJAqs;&(t}U9U+3HLk#dSzE?Q0k_Q%0HX zUAxSVhA6KXnu=+b#!{pMfJdn}0C>xtp3VnoZbi1!ZND6QfpBuq*NLVi0 zI?ln>X1mt<%+)s5RdgedY5c3hEJ#&I074Am)_L*o#aFk)p~9UrWZ94o0;vuGF|GH_qD=?C7MsiKB2@$^G9^u z4$Q8Um6g^w4jYnAO+Q0k!tmN~yY%IJMjDJ2lM`qU_a^OM^(GIPYmS$5V>6Cu#ZPDd z?RVB0rkx14ta8tPhc{vN+u%L(h1>q*S#|LE=~*W$GQZE03%s8qYBEYR?wQX*L4}7& zNoz@$%PQkS?X%hT@rpkj`2aSPxQR#sY72{|$bS3tDKyU9YI_$Cwn5J+eR@K4gXH%H z0`h*5;OvoFxZd4krwgcfCi$?%mBft9pjM{)t}peaM2gxu{JV+5Q}?>>v+M>@yv zm!A$?CPoI(t;c|l>rJO-C*;xmbD$;1!6a_5x_1ZM zdKr~?D~xVjnqBohX{Lc>=!~up(pBLQTeHwpRI}rv;n4j$F`P|oe z2_pK)3^fAoeW(7?N8Q)?1ZYDt~6S&ed|B#h`_tXIX%osJab3-@DT-M zRfl~>;;r9cdsMj&(c-UU_d=BGE^j-tVA_^+(QSViu%bW2RSTt>L_IPuE-k4P9DXLb z_S8hUV~|qAJ47}%w)Qx4P;o6`t#i~ZzjJLMS5S|Wd~yGKj{5$SWxW`Qh?-NiqL|>E zpyY*^H_3>j8gL^Ugx0P!*+6;yAIvv!wca9ez5vLF0HA4qgw3S| zZ1Xkfe@JUkbrq|YGQMo@RWz;jjAe3bNo)CP*!bo(h>*f#a;(_D2}Z)V%qh5)p&F_k zuh!0dbIFcL)b*%eO7|&R?kvsUzbmXT%`1_SQ77|A@#T4!_pfI*nzC@qv!fKXAWD&~ zr}hjtZl)sIEaEqu_Kz=6AR-RO*w}cp)T&a*)wL~{_~%Trm0fYMmX@+*|C<^Y9BEtR zZAl&p*sdt^^i#)5TW;vd_u_(cV0?YvLBNH1e0ZkF589Af%xa<8dgJ{PX!p&;r|GY#{t-He{56d8kA>`h$l zM7g+d6+C16#OR7VBWl=+2v&Tt0ZDtc5{+Zx?>}?=xoc#QF{xGiqmBDMFGu!$lxb{C zhrcRqnlkBdR`0O1*1Q;r&Sw4ar7g@S?W~t?_Mf_iuH%d@(u^jfXH7w^nw1Q64Q+K$ zme(hU748cp>BC_*hVfM`G1A5i6S!_@{XmpUZ}(QiX?K*V=JUYk^B76|2q?pAs z-39#*vxNM8xTwrZGgdOD4^iUdqZi!&cyz5-t>IqY9JBUq{GmCqxAeqK1lY&;^dsMF z@lXV!fP*KfJ4^oXgcSWQVye(Ehhk@)*>I_MQ_M)!5(+}^{UV?$h;oR2!jXH~2&a%u zvPXo&MT#BJ=k^<8La6dViyNtaJI%vbEwaYHfzy!`WhAp0)4TdorIpw-jTSO&8Mf9iL_gh64 za(tx!!m)__&nZzXCu;)DyP-5JLZEgjod0GsXhXmTQu;sf`|P=HZmnsZ&dC}XqgG!+ zseFXe01dJEMRNP|3Y0__i|?bmAJXLS^nmhr7>!6+kG1exBc-@+@C=2YzEP}ppHDH%FB#c!zYaL zskD@4Hdza43!b4RDqptqy^oU@@f>zyo-A*ZlJU3RWwOou(?!8M$I&ru+$O|)BB`nG zI&PUyFj$d$Z5GoMLsqHYf5`bsNKjBEdz0_Fd^|K16{niD;*D4Ylk%C`Z`Zui>uny5 z@2?JGFibibNCwbyg1REw86+|2xeJ6u>XulI&~j(44U2|m<6Q=n>$y*ADdy|(37@UC z@^TGro-aQY53kx-rFdRz#(s20@Lc;V-#zzwRTb*;dpgpKF}E}=0PZwV%T_Jhmi}^B zzK3HLJfEvcQ7#@iD2+HACE-buZgyIdf9}h|mEC(_DFt@P=g)zf_75sy`0o)N58!!l z82A94TP4v3pz#=lK358#u|IIA#ixmpc(XA$=6xm^8urG)#-ARr(ZP=x@=m`0>1#H5 z=X8tH0=!yRY6@SreMWR*ZYP;iWJCWuQP}~*@T~&L$W)>fLrdQYA;RJPv@GN+0#g7oN3{FUOaFTp zH+(Y@?F@VHB{2jV3>_doL;mDm$Ww@!b%drG;u<2GAoN6vI)2`dKk@j>T+~clni!fI zb(RvWfWQ16Uj(ng?Myer>*mu!qf0umH|K4;lfy0Vi#bU^jH_Mwa!^A7$tfD+X~bu? z56gU*K2vD1Xe!ZfFi77XtVU`*6^0>z@I^Ta-)y|7)eDF3HgwlRg z#5POK*Lej~uflze;<*q90aPQ-VT68q|`u-GX(ZFrBwnfSw)XZD6QMX1Ok! zDEfL-Uq+5qY86#>ja6Qrm1G9c$qm;K0unVRAW_rexkWlr_@5J>`$hAI>riAzw-t{M zsOhr&x-eV`YwH^>7*QJ+Rj8%sM#>2W3c(5j?WABnwg_IZf z@%wSj1~*r#qcRYJ#V&J^s2dHnKbbZu$L9e_G?Rz1thg^i3qDp!)k1g~O+vN4*PkoV z_49Gmf;%acm{CcRHcJi3KzM|Rv@*isOcxC%WM{1^J~d`oN3;6aqSU)2s(QI^{T{r` zv1urpkO^PwtUWMB0?GmC6USoQ8P!R%a>OSxmFTDsU9J!Cn)R17gp~kry|ZLhsn=l; z-;CQakmPQ+7mYrj||7O}szaJwfnk6Mx>6e~MUj9UCzrE5lj` zew(Sz7cNZlpSH2AL8}Bh-^k+YFDg!VuX5dG8XXs=|czNha>u*#IhDI znjy5x|HvCTY58dw1}SB=p4XP&fWR5IHl-3fq`5;AsbB<=TIQOgSoGkXS9dg*b5vD# z^yp<-MD$b1@fcvYQKCQ#2sfy+l_5do4Kw;QkG{}3d`(C;bXJ9}?fD7m)0O;5fa}Rh z*xoK*SooCUbfe$>L#C*u{Pa5ma7g^TrqH7BWS5#&xrSg-bhilzRD$2*}PZN1T(B?0tI~o zzgMy}_7fb53Vt`0b@hOF=A4u|6rY2XB7$dOO z`@?UoaZerYW)lJH8ZS;nJ&+( z*YBWaviZ9lw=tz+%irRZ`{yN98Gue)VJbZPdbFgL4xCbx0Af;Zf()} zbhDf;taqUtL%vs&F>WfNNW=I1cgu7)*i)i{8k9DY1E{LzO}OM;0k~S>iF+^##Zjoy zmMzpMC<-VqkV@Vx&Wp0#tvHx=Tlc{R4YA|&i0Swk@2z(G{bAWJOJu%8=jyoAk9hw* z9`gQwj)!#)-ND11pVE|cx#d5^kOo6z^Cn81%*4~2%wkzL+gwljZp*s*o61ykmth#`#kAGgvj7<_tM5FSqCh5Ot28_DTIQMRf5fQ&X1Hgi(aSoFDtIC3M zHRl2;51x*$f1eI@4g}<}?#KOp7q9A;Z2@Ci3-YOAh!0_Zjm8^ADJ8nDhdv|YP}c(r z&zFbYQPCIQH;Cz>!lk99=RJjx#|qT%q?~RI4|?6cn?EGIt4U%_2qjcFl&b5ON+zb^ zde8~NT*=_mh%gF;L5;V(@$;Wah2Y|HEzeqHo_7NxRH8$kz|G|_EyJPl zj>rn7Mgg$niBa9jRtwr@yiEh>P+7!V8S6+C>?>P#aaigGuSqUEfg;IsYS)k_j1e~^ zllG&zM+m_TrS}mIO!``prXO)7hI_d$( zsANuKjMmjF@y}%ZW*z*5C*U)7ru5P|+-0oC9apQwN8KE1*rLmVt0nM07i_ZNefbmZ zC!h!%BtQp@;@pIpHpSNSX4)OkKAR%=PtOub<;y_1nLDKfZcgg$&z_g*oU5%ay}Q5C z8}+sfp8hw~I@Pmi+S_v1-Y`=SEWA|M$6Jy;2P^&VR@F6Fo{!TBzZE ztLJ@4Z^b`lExojr*-^HutsGTIZF?BTsdlNe=EN8#sVw9tEpugsJ66pTH8RK7-@Hrc;wDS8t zA*9zwstOfRVXjJs*HcrKb+>WD)imToP9A!k&P&y7Pd+Gb)1s>3qp0JX>qS35JsA85 z*o$r2!ZnEqBIUy$wpc87Au||Wb1bK(;ZVd3x3h~#qZr#y^Y_)TAVx(T8u1j8iD|H! zXK+4S(j+P= z^W4E%tm7a>$0ZHd_(qpQjk&w&$>+Yy^bC-Sa<>ttBbX$;vF3HwZ_~7DYQauojpnbk zwOK-R1P*{;Fb~lgh$gg4+hvIfpyO6}2@C_pWl?HTsg4=A5_v9$NKJP`%jbPMCl_7h z|F_iZAsO`qI`W=afZW|^w#s0E4aD|`8s`mh;jj3!oIjZDjuv(OSfdkpH&>(y-vcUw=4l$qiRq9%sl64*qNuP%J3l_YO!H zp$6DafiP%wlkP7NF`;7qR2ati`FNwq0?=j~_vTs()VhlqA#ku-1;Q$0B#>oDVKwiU z88ADPHaG11-Yf76r>JP?L}o{novYZwUeK0Kvh%&mTPh@K{fOagsX()A$|)`HMo|zBvxJ)rl^PLtZHjd_5`$$wt>Q$<{rL$NnQ+GwFF7-? zCUc1u*6b{&?Ta%!n_yDt=k1@=btJhYsNT&HSX(7TQkBWvr_H_A<{6G7)JY^7Pe4#C zJeb?Y^N{s9;C9&T3wChcT8iwM{=E18(gV-#aF-b-!**(uSTR84OgQ(F-=5D%uoC{x9AWEKQXs|X-=hw(y;wa6~Br*7dIl| zs}7!r*DoSvSalhN{S3cqDDcUDatnwC9RLI|_^kpTLq>@tX*Z7|&nUY*jV4qcPfyiP@US#8YW(_EIm3M}-Mdl8=O|9FJHYR*N-M z6jyw1@AW5LCexQz#gl|{BQ5QOldx&sD$4d{o|+{mr!buO?k}!F-3;eiC1|mMv@?Bh z-A!j?Xc)74`QBRr##={z&Jf9UZqu4}eyh8lQ~GeWGpPH=BE<~BXPFpsg(_X|KO#!f zakf%#B~QaafoyTw6UtRd#SB|%0DY#7W;GN%R(&Yv%)*|M9IpusRHh4BqExO^Y9hFN zL!*)Zl#GSpF`ev>;=4CTz~`C?Psg&kv0MJKTF3C09!>|ot|0tgKfGoJAyVxhM%|Ej z(d;j$^D8zM6`j`GV2CoHS8pCmOybOu3_KtPATEYJ&7Ee5>$G`GnpCafeP2X)rB0h6 zxMDK!3g`aMW%RdWhaxpeGw_A1&k%&YTiwIW)EN|>*E5^HyPc60Oe)%5c%^#d0g^dp zRNKI(c!9sDG5pUD_hnV*{V#kX2D({l@Zyp({lLOuuwAS$WIjDWtCusE8bJ3Hwb0=J zW;~w=uSu(+{(E)%oBbv9+YYY`K?RlvBmB)n>Nu1J)Vjn7KGrLJ+g^sqY5(K=yE#!Z ze|uD85i;)vD$r@JYoHd>uO)@3c#um{qn=E~!cZL^spNh%sp3$?E_FS07Yf0+id>7n z@ag(q=-q`DHbcx#j9>FdqBP{H^s|f$c`0q;S?imCsyCW3RG;0+PqyM&CQEK7-F=N9 zT-A!%8O-#n6h&>be|MXNxSFG!EVPK8MDC`~2{&A|kwUewj(6X0x)cEV5C_AcY>pdFK`nIFpB1F{Lu-_VWHi9()Fpu zZ-Z%7%O}QcKw;mXtI+zoz6*Ui)y`&jba=PxRV8#cUY0I_tJ7euVU|ya=U3P|=Iv0> za8*@?+SqW^3FT^iVg7swcG|TFMDDv$TXVc^Z^5k%{nOoc{2y>qw`9b_xR2&)c+d>t zu$Ur%AmCF+9z9jp!x>n|)n=Mlsv)$=jP`dai9zt#n=?vsY%F5N9kM6|bw}U#?~jhT zUb2>`hb|KE9%BE24I~9^c8|k6rx_ZI9^?h@B%{^5;q(9^=%L|nP8a;SWb$AB_K|_{ zTds^L780Q=**hd04ED*XlT!>Rjk(Z8Op`FR7Jqtvy8pEDWGK>*Sui!mojDhLyH0|t zA344n0it>g_)Az2+=kKmg>(q?mYwN{;6SOv@b9baL2Iw};V-qar2B;SLuolsU3-ci z)r;LXcqwfp*mt1eySJqIUn(&=Lp%q`-?>qVXk~@1?`n{))r*c5m}qniuoP>Ne{ck; z{3WT{{IXy6Bi;-eiA^@U-W@?ET=f58?Ja<^-oCcs1A-EQq;yD#l!$b9H%OPXv~;I* zx6*I}(hbrf-3Zc1OE*Z@w{P$~&pY3L=6&AzzW2;Mb7s(UxPQOB_gdGwu4}D5T*Mp? zsgxajA)2EVhO+rV9V{|FxHyi&EK(i4JfVt_77#uRRxLMj!Fsb6)II-4;JB~HB)M>8$3rI+rkiW6Ww>%xh4K-!=89!=6YAicJi0K1jIS? zVZ9$dHAlWi?YnM5H!Rj|nHSeqQ&t`~oL#NI1H0|%i}#rA)^gm~hQY5kQk~wJN%7&- z-5TDONKznG{6gIJ`;IBc?PFe2*M%JsRQwYWV(&-S$lw9$SNK3l@5x86-qLP;UO?&{3aV z1O9k@I56ADQtLNo40GgX9oKx$eKUgOBtkdZnY_Z1uC7FrjF;A>mI|>zB`t=F{r;*# z41jFm-_x7JcY#%&flmy!4n)D>zFi6=k$?AY%y7^0-6b@Av*w#$cR;cAb=+7wsMf4y zzr6WAU{R-I<=!KMrb>wc=)%!=ipfdw1lEn{&v^bsC328$JEU8KEr9jE+X575`Ovbt zrZO$0A|(stjz>cz`LPD~F&V7zL4voEH)Z4yiJV7346f7!KgbJQy-@ZKd^CW>Vv2rI z4Fj(}zl8mwx5n*E?{NO@mz|-dApLS$6*M~S5IcD?Iqh)EQ>_%WO3iT!$a(}Zuqys5B{oA~g`y?Bed(^aXxZx1 zy>4Buh*)@D4UPT-NxVOv8Hg}0d`Av-zhk7(%=X2w3@=P$TMe%5} z$5q$|VvIvkxF<0)Qo;y|l9G~9F)^cMx9>^hqgHQFeOp86E5WVTcO@y|yrEXkEOpc$L z=PMgYcL#|D$GnBBM}0^&Rk)ctB1e+FJK?B2fi-3D4P;=AV`2^f>u^N-9IPeYdF3!K z6XaYx1XeeJJc7MVJn+MFN=ZMXm9B6*g4MpC7w0s-vX&0m-{v^*ur*m{k|b-9Dj;z| z75&-_WI0ODG`CNTR)8}>0*fkm%}|8`HchyKIEZX%}?z0U}&_hhiW#8%FG zY@~`tat6~5vv)M#Rcn45*h%=K5B7z0e~9;eLtan(X@;T=9owHpGALeCJM!4AT4eu^ zaGc8I`|qU>%RhcFBYf!~(K$aTrJy{|%=0A-4+keS z8sFYekX7K%-z;f|JKhy8&gmQoRU^o z*C(Z3o!U-wtKY)4vk+XHZ%#GAbd(7z)z7<>G}CM5kz(3L{vq%t#X+QnS3h<6N8KJ5 z?l>R9#CbwSwI!#xkwxZpF~w57tyC9Gg_kRTbTY`aNq`G$)@AK?UHK8Vkwo?NA%~uP z&Ij*R*8=KyXKS)RAV(QaCJ7_qBTo0cG(9$3xd5Z=!iJx`4lhuIO9v;VF0oFcDHpcj zr7S+r;n+eFi+P5@DTpR3zwCxBnKk-_f``r~G_9+duT_qFsnYq$&;o=;py_rF-{HRu zPXAf;M)79@PQ1S%^@PZKpW5-- zAQOZ|T$s+Ole#B{pbE4Ae;9p7W+E|$;1o@sWPfmo32^cgjY7pV>@^E{g+iKgsuJ4g zdo-*Nb@kIcW^4t<5z%8;iS(}>m>!DDAHQN4QCmJ=yQzPYieWIqM~~(E3@P+Xa z2IG&}QP3H0U=b|tj6Cu0EP#xa{+Q#_${w#>YR%U~ma~PY-!Tox6L!Qr>Z-?L$fj+p zcD;4Z8ezo*V~QOv>uuN(D-3r5aTChW{H7Dy0OAhrIu)0aVzMMRm7?^>SFZz1Wcc>a zcn=T~q8JAx!!Z5;wrE`OQdSQBow0%2ik$EU87mVlfOg*Qz1F97!f={Ua$emjONZA) z;@#-SSDXTIK->72yqt?5E!5nfPeX0LPk?9`aNqLJ>(3Sp2dmpW!85V5~u1z6|lI^;r=~UzTa=SEPqXE;9gGf4!8xL3zlaEbXi3hFR z>yx~NKOH%YcqC?2Ub*r*UMt7#weN+ezvzngKIQQWq?%X-cP$q*W>P3dS_DeUZshNil zNkkx_$QVp+Vc#yQ3}e<(QKcHmC3aqf5wibLqRpiw@L5?PA&l0v^>xF}TcM5j4#Y!_ zfG4R1CM!ns2|95@Qm$2S$p`C1x#x{5*Bt%~Cn2hqZ7<~)9yivvOiTn@9uB0weMHaZ zyhlk?E`{}t)|;ej9_wmtzfG5|;Ry!lXglDzf#(;Tx*Yc*90ox3b zt1QRBoyiQ*?c*?3F${x(;2f8`+Z%B?IVV#)l@R_zIj_su7dnzMGAxQW5=b3uo5D@# z@*Le}*Bjw{5>(FP%6rzgecNdWin-E>u6szTH zb>@}y*@dXdWU=($YS~IGYIPGADtSOvQPMa-bM?g9y@alyQQ^b9>QyK81ZD77phir2YbYp-_lX7UJY}asn5%ogoBe0U;Q$H5* zBNc%tBnBr6mwh~&bK+{ybd8JiDs4JVIwUVOFcyI$BD46yY#PHzWqoTaT;e~>d-ar| z@c>rG>&5{K2PYZDe60VyzXNk%|M&quXn3o}-pfGEqo z$ALyQcsk1y_aqcYzfbZH%3M`YNU%UMvPs6Ajfr1}1HP3n9f#a1_pe5mN`gLb;o7)u zm6CBFv5MEXi-bNGAp2cMi-?FAWiDU)1sm@USa;trH5B1|9|Z;0e8*b2cR_I0LYnhR zXrTImge1$WyB@hKT(D5qMILkpjigRfFWosi?ZqY!UiTcV~qZ zk1jZg(j&wGyYC2Y8Uny3>+9FYU|xUE&=Bhy`DU~Ih>QKL_B1UmEs8>t-7nBihfhcm z64kiY8!3E__05{$sl`gDw(z zCAWk6%2jE^t(ZWP@RG?P)I@V;aH89;`Wad}YHe1=%o|}!^A7)#+<6Kz{X!Bf zH2Sh!B4qL`I1X;)8MZ9nYHo!ysC}he<_?WGmV?N{?1Ki${Vr^u>RyfBCt~;ipr6T7 z0sS1^+^%&mPNRcFd~3xaqCO;cMLhNC9`$;|ag+Z5q;r-5dp4zgjUXthNB(-uT4`aI zD0*V@m@!s6t;&Ho+5#wg^xLD)EmK5oE2m(M#i&L+f4j$TsEN2}%9nX43z-DR#E)OG z)6Ho5qg?f;mKWpi=s9FeTwD!pZs>eNrMzM-RO7?g73;iFI3Z*1-($t!oYoc+-9cBe z;FyjgCtY_K2_PvqIjznsEh^*TiJ1B{P{O|r08eU zO7tv+E`kC~U>q@4fdC&k!8}M_>YMw!^E1yZNjSc#d%DczAef<@)||Ry&#;N}vc8 z+~iM|3fZ!b+}7K_@BDR&JFsP2rYeAfrwsf8%9i!g}iKK%3G-i!VGmPg~` z-+o4j=G*&wq+KIKbYPX5BpgrAzH&Y#`L<4aRGb~%o!LLu(vFU^f5Pp)NvZ{}-nIz3 zRoO-nRh5-p!i6DA^g4}Gw!+*JC)CvMP#BB+N7{j>M=Dxe69c}es>)PD3%TE7G#k`- zkCh|{@T^4cOSSFs|97btWDkOa$R(R0Wg}V81sPwoW?GY4mGluZICLEkdQvMqp&T4n$g}3^9bl} zf*=dZbRXsT_|owVh7+(ZhxC`WCJbWGBRSjdDQ!&hASvg-qol5KJ9gxfHe9K3Hxy%C z^^Fg@DTDsLiP6`Ls3zY+MGsFIIw8 z;vt;R>`$HbGNXOf%b%Qe$gIY@8rei3f&UWV9UEW~G`P7uA!f8=Ck!YvJKdS?85pQq zT>+$ZH;C6(raPd5Lxj6%w4XNm{5>}A`TqhqC|Wi5%ny2M+Mx3TfAdX(>XuQx%%~ zA_=1`DgXzHiiZiBPpOomu_EqaL#1AOfM}(;D{k?;fmkFl%Ef>KfEp07FLw?pQ2LrJ z=jTx(a23ptW?7p7nh{0J6&S^WxS>aCKFwAHdZeU);(sv5HI(u5rd z560&sn&ImDx&f$4UYi0aa#DLbV@_ZJZN8{gnRDk`^23BysR7MHpP*pOr(Qu+AR)H? zB8y2o?ibwMMtkHNi>Wu+-FqSq>YlN(wy(P>o%mfN6`ZZ+bg-03&-E{HaY7N9R4GWwcD>W(yV_NAUXMJ^V{X=ZYcCtjh+22GA3GN+t?I;II>`VWt$g{ zx5visiZl(>2`RUo466jLl1aqOYIqYs&YJwZ8=3#da={%&4a-(kq%f435i!-;9hW-r zX*stJ#jaA>1R2>YiE`3?#0IDRuK6an=#&(T#!{2~N#P4TPW!w=+#-&E?b;)k{@&{U zO7)^Zp%}@L!pY9f4cs2p7|=Z9kTbDfDpU7|zUP6|;WWjlRi-$5VQ^sdpkjI%%Bn3? z*i8bjRjDoTFcg4c#~>`AdR#e3DHQ{<6^)DO|L2_Du!HLj1ry$`5(Wy1 zh=>Rpx}cdPO-xMe)5E~?Z+17tU$F0}d=5zEm6XDLpXqirPo@c{Vg#tsDv>ukW+$-)Mhah5b@!JJpz*7YGh=41Y%F9V^ z?2LHc<`3kizQ8q-^t>e73V2v$wssLJn(uDiQanzlWiILdk} zSAvApMJQ?FNDC?o@J~iiv4dz($6G{t@>M*IiZpWa3s{1a$WPDJ0?QFb^f=ALLKTjOFa9Lb2cj1 z|7YAto8lO1_pd9VhZJNH!yR%}0ci2}LL#GXv3-u>6J?L&uBWD z{qh80o>_2b3Giwp{rODQO=$Nt4djsLC%1V3joK9$b;7@*o(h{uQltzM7giLdqR9Cs z6vjk0VO=8fVF^pK=VFh0J=I}!z~$G}UW)q4?NEJJWH|i0q02Ro6a!9@)MZ-L6%G%m z`m*>BOkLV(v3hW0?p;9rGMSK{XoI#J^A8_1NA+w?rN1 z$00i|bVvGO`jme{MeMV8NFQ-H&5%y|^ibP?KRWjVTbK8{I29Dd@WYlMOx5KR(g^?> zt;s);$>iIYTd275Q;x4mqKQag0+^h=D~C7>CA?hTI_{_qfCPybbN~$k+7X?uTaYFUsQB`>fCO3fu9PV-W6ug>0~T_9Ehb;IkB0H2R`ziY?H&Qdv}k^n0`5V}gz8=NFmopMsdF`znQYLA3jO%-L!Hb~RUFTuZ=&igh_OFa5{T#o zR3s!215gOoY(pls>`=bSrO&^wBy|HMi8BP1k3KwAqIGFpM28XbO4{3t3W!FV9Aqy%+jgoYbx zndL7e&1gu3RBn?(*O~LK>Op+}?!cgc|(7=N! z^u_v)K?kTkk6$+3dSL36?Gz*hbP(183u$y*99$~tHk8)uP~+g>puc9Pop)I$BDC#! zF_i1+kxklP>Ky)xEnEcuZBoiH#tWGcroQ;Yj%BZ9>%k-z=iixP2Q8FR_^!dF>$+be zM+`)jrbhVvpHQF?!rG=v+BA1>radu9AdCkGRPFBpZT$anK#z^9RFZ+qY2N57fZbADHwz5J1_m3?yDb9UUEHx0MiJ>01Fsl$qA*qybXJbqLNviB5PV z-zDz$M6sBHLMEe{qCSS=PsQlO#NhVFs2xBK@cSKyrHL8+%KXmt8ganOlb50*qSAir z7h=ubDg(RCaxjV@T5dSh==AW(6nNN@#e(;AV(vrj_ix}4Z>@Paj0-~n^MB7L=(v>t z(3XF6O%3|L_|b>p7fLDxF_4I(^ZLwpvnluGtx-rzpy`2}fXXAJbVBx@5gvDQXXpKv zH3Veb9Npos)Poe4Y*A6kPXN5_eDsVb=-ciLD=yU9`hB@m?R51mIgg%=4WgNunS2(+ zEIvL#A6zW-ik4P&cl+77vM2s$yqba5ahmwAspf??Uc~>kug*Vv?-_>-l_aME$XIeL z$Mx^FdWa(4)`%OtS=&3!sVGf9KBBQ7-E=}>X)``h zXz~{v!EZ(eC2t8jN3ZoKW;>74$mqgoM8`}5%rEhS83`Zw0ScL$&}Bxj^X z2i(Gk*pV(YEeYWwz&d0ptq*O?HQ-zpRK+Py*2d9%A9t9PZC_9KusXMN_Yf8n%W9Sl zE*opX!d9T=;v&e;rxsTnnMOhmedB%}-Sx$x`s9o4G?x9&RC&4ie?P7t0?gbX7AP;D z-j|o@j{nAthsXi5fbqjabaL8*i@^179~GbkDWJV963GVlf*vjT54DdFP1iDDhJuF& z6e$L{X0KXmX1kq#(5meUJ4zHH{Sgnymu2rW`>3M$p z1Q4hM5fOY#aOB$WTRa0HGRdrxobVl8U1XdzGeHrQ-HHQ4Zg_=7MNh47&RWhlyVK(q zX~xA(roPHNcHEx{CvI}p2c4*D-~LElpAE7_&wZnRUB5xbect*Aw-{n6(`@**K-sT& ztcn?Q2pQ1b-t*khjfbZ3_e`2Spr-#%Ni*?HCw9Pla%~% z_lUa$y%gaHYKr^Z-O0!eTH$p0i0SEPg!3NjL}L>ZnP8r0?Jz+>d?=$K{^$E%pJ;aNeu=m1_wTY1R z-yiYA`asQ*q^5Rs&`t5d2rfk*o34ooQe!eRFk@EHb1Ggh-5ot;YwnpTVu z!fCywiF_20A=k=I^MgLf@mydDe_=7)kRPZ$d^tv49#8(V3r!fWjcSva61^0HvZM^; zO>}Hp(dxPzhMJ|K%3)TL)()~2Wfc|tw$~hd7#J8~39GBC0S7d^yz^!7EAEVmZwvnm z+gPCen3D!(U7?5kFh66G#_>}VXGabin$RWQpKez^`d0F@l#}PZfek72IRK%3=W)~` zkXQoE9K>-FcJlwV=l}X}H~k(!1U3v8Bt{Cxp~e^cYZZL0{R0nBjbqk`mR${N?h1-3Zo zORK1&JEIZOCeHLqq40Injv|-@p7`wj_qGqJ&q>PxuYVtC{P6ysORWdmNApS|mCAM7 z_Xhgpq-2PVHODvJ#f4zf1q?yxqx*x%jy_dwW-b4<@G7jN@`=$XVcfr%sFe!9ydy+W zPQWjKH}FveN(l~CTg=sC?>C*L9ae&QSTZPySoCp*eh4JY`vgMv z3B=au@<^?F7p5QZR^k}41k{7xIP!2kSy@zy`j{X)MMcG^VzO7&;8qf{t}QSk?1zY2 z_&Nx(2KZ~;82JSyB|}qO942S`3z11l(5B{$`g;7(1-FH%eewvx0B~PWA{XSra}SN+ z;}4}25&z$J5A4G87=~T|NI|_0D%pEwTs73?b-!WyPg; z5ECzJfk4lu53M7VZ9;>+7lGO{9;$Y8AoFPYLQl`Vt14tHb+sO>!+q6$J6C+T(tYLh zkLv^E{16WZCS77oY=0%CEvku(h5pp%$4l^aKb{5p7A+pFZ-51jmU(~k4TJl}it*b{ zfeLcC;IJ&?7gr5VS0~ws9e+Cq@K>x5q0ir-x!#xefATHpxz1p(xiJ5Jiy#Wv@X9H_ zm3r`I?i;nB=R+D{>Vh&1y~4dEADDvDsChsUS7imuQeAy+W(hxkW6Rg963N2y1Y*FE z-@yraKya|r>t)}a678eqKJfH=B3LwtV z#yG~BKODFNY8b8-KX|1msF&Jep?NOXoXw8)<08MIkx^ozhlZR$f}G7L9#}!zm-6b$ zp$=R$Z>~CFcZQ0EJUogu@f8#pH-5cFs!2=tZ`Q^{BN5X}(0tDv!w-l4@D-8%FMdUb z-_c@_+S)1TS3Lb4E2n>berRl*J^_xaQi{FkJyz>sa2;oHIRCw^*m!Kj$hub~>fn${ zk5pc+u^19*iWe);kP-MEHkoGmJ*3(%1}&D>h!uR`d7*X^T1BUX zZL+^g=fAjtyGiCe&exNO&Dx#jE zw0~k+LWf3AW=eTAEr09Dc!TdxI;Scc@bhq9sJ#0r>X;@Xl4+95uJz{%>*;1}P}rE- zluKk46V@!)9dH2lLEAu+oBof!<2g^D*Tv}lYFDX%@KrsLx?cZqD68Oht}y`1`aE~{ zaafcQ0Uj6lPF4D_N1@s>mPF6#X}?R6?1|~wgkx+^T*ZlpAw`Y9wWT~Awb?8pv{nxsA%7|uJJ-E@&81)y)&Nz2Ry#X=mK z^)kg5>skgYx#^KA=(3LTGE?1Ph*4T)!JmLnF%+s6+s@W~c!!qlOSeAJM?zN|3%%y~ov4+LA|8RUPzu(_o z_*bCNYyjj(*4gdP*`=v;Xb|_6T)gCytKs$pGZ6NiZ(jGCzkhI+h`@Sr*(I!+DnuFK z#fzcrf_4A!k0aO#P_z*bLY-Os?2yO+e>Iwo${VRFN$oGG#Np*aF*b+6rs8tJ$ZPiO ztUy7>fwPASJ8V_|vE$gG;o~nycb^^ohlXbAaL_m_*Q6h^q99ZX&TJM+0a7qD^8dIG zcwiulH72}zvxy`W_bo|feEo*na8z)3dqhTe4S9+k?_kDA)^YTjB3`L)H5F%xCJhpe zM&kc7Qucdr*3Tb|{V-zERk-TVfbaL0&CUOhi2T0_O=M6xBD1p6aSz^asE7meeoy(x zLv^XVeYEuln2p8^>*$Gy{FT%kphX(akY8WU8Uc)52HcW>gM$My)z7aoLT7t(=r}lm zKr+b$d<*@|c@{{4Uh*dcCL$v6(8fWF`NnCcC%jvf>+`@j89Fxhj>ILK00ZF}tbeES z`MTH9;rK9iUS5G%lak2l>K_9lh1#P((kH~Q10IF{uv9nO(3v7*E<@fNmBDFwsl0rv zxYCqD-2m1eF67}+*Zz+ml}^2a!v5t@;zLA&OKCknT%nHy)3l7CYEL3))v;#&_GV+qXoN%N_)GxzrTN^(nRj}@87S`gRDF7C&a9$ zZNY6X0dIZ`olK7+n~RhFLF_p>U0JiG(@+UxXfEV;@n<4SuwHv@iaN}}`$$!$?=2zk zsVMQhRs{*oRmOgZqA_KDka`P2Qc!Gnw*LEJ;e(kIA{|#U7_WzGP8wL>B+l3dg( zEMbR=Pec?xneNtw=2xs$hXL*@$jxg<42(n<%lP^gwE?-{(Cd!AqmM+&<2gzpM_v;C z;$Gp*(#8vF(TVm*73`Hg7mlFd!yK~h#XJla^A5e<+S5q9jVL_iz)$&4pN)c);D-v6 zN-7D=PJdg2CO~UY4cs({O{6K%WfJ{q%e*K4O#iKI4KIqOfy5!MDDlkaYMxzF}^7esr^%E?c}CBp>LUzgV| zo-;osY5kK$k5=vr=PUidd;feeSN|_y``*su|I^B*1Vjj^xw`1bv^oG(06nF8JghP) zE}X_sx5x16-`U&QUqa(c6$Y&1ygWH@rMh3(LLG1Pj_Mt1WPM2QGD&IWvo#|sz3x0g z|7Vjxn8T&sT=~JeWuB6c1a*&DZzv1JBE3(F$GG#KYFN5E0g;ZQiNx^R7J?W*E{6U0 ze`+oFh>O&ejI(N-WU}J(>;@KvwY(fP4RgDl*Tb5?zH}FTcofVBy7T|qqfjaYJw_!A z;zUdz%@dzt4^9~ePI1xdTQ0!(d0EW4Bm}7eTY{*7fX~QIhY;7w!aDJ%Z%#J^)ehg$ z4LiF!^`&zkp;rjMVFwpTi}u*o1XuZE`}7rPNV|PtW8Ez}(sUJMA`LtZ>c(W^Bv{d5 zF?qsT(4*x;cf>neqe)2Ikoid8fVvNf!VPfm#`l?EZlJVM2M}mZmxq&}e*Q~TXF_L` z$p69~3I_SE8k!jF9e)iq{R7>r3+(q17vZtb#Gf<)(492&CgR?FUF_$b$~Gnrrq-QG zO4fqrnCDy`fZ1ND`YCI!upfy8ctC;iCOLFH#4}K~W-TFla4oIcb-$1dKJ=yS2~qMb z`X<^3br0>oC3C}_DTqM0eeta9fC($6&?adVKn|-40xpbLh=wV<#z#Fg- zv8o4R|kAXhlB`0Gk^XSX>M~`3d&^ImB5gxdk`?gcb-PgbA zoa%SGt~;}Ci;-DHoDm@rzSrnV##hpYjswDyqT8~_X)UzRk|!(_TUKf#cz#(s^JN>c zczBfe&BQwTb6l0JuG7EjqiwtOCBs_X@Af=V(s&ngp6$%?nK`>Ky=uUIrPd%aV#8Kk zRW3k3{SCBK+^|RiFv7T$F|!i?FOq0Y|9iY{xnnnyp#Vt(7$>dsqABi)cQCY1YwtD; zkLiuW*1ONFx~f32Hizup1S4#hwWA7wiWlgGnTSmkw3B2r2F%(i5IK-ceAeYrCCjTY^zYm3xUs2(rJ5U$U}3_e!+kn#r!Q znH-4OEzc$CzLeRX`!*==`-SVyJ-46LMN*n+{qTvrj>3G~Yr&G_+(G`UZ$80Av-+?_ z2-lNp~%8NQ-4y&0Pr zTb@)lKRlsYc)xN+Fn&f;(y3=GRJA*U+8#j=khu~-#>moJgttu&)$=kdqXBpU;hpoU z>pr3Lzd@+_diE5l!q+CiM*1F#Qo`*tW{Sp;nu9ws?#C(a9r@$v1gTdV*HvBA$ABb{ z$#V8NIXM*Y2w>b*Qvj~HFNQ(W7M6ho8zBCcMg(DX(59j_gSVCI+uD{}MY@5sBqlA5 z^;Xf~V3>dlV&1@R0H5rns(0DmP}Y(?RI7`I+4|JiQZegrDlB3nj%XiV#t~HU2k;M> zY*66AwmU(*9uH!$&V~c`TKK>|k^74tGj56CG)nZ$u9UaW z=PsSBEkdSN-h&>mjPgG%y6>kObRiaAiIZUlgp9c74FX@Bdxa^!%70m9`GMwezvIKe zjt3s_p=O(x(3%77DQxj3&nMra_bGaWvG@BO@qWLP^<0AfaGUMo8c0>u*y?M!zou}h zV#E#rfbpP@lWe42WiP$#uBAu6=schJeSMLqg5`fkyp+?7RPKnNYzptvIM^1vWQb zI#+20OWF^&pOJc{^HL^&g_?Ud@1L?Ts@0GrZY)Z0?Xk9?ov1d6gxGkrpjy6DXq2G( zuuN|`HBqR0D*S0l!#(K1GHo83kc1ixskCg1IU^>J7DB2BOZhH2C%g<~oXt!KW~u;U zFxK%tox^n*MXmX0Dh1-vPTc?5>U_sC^f>CJIxxPTnbCj}g`i!mb*9>UZf_3^(|qH0 zYEfh9ZUl0rD#hRpGr9q_c2{{5R{^vC3i`N~8^AIg!Z-0^)A5Nv2|OQWyGvmN@hh;9P3H!*y0{Rz4cXv2O)$<&GycAMEEE;rK>p+|?Zz5wXXuO| zed=5-g?BfS-Qo{_OSnMycPP>Kv9qEW~b zko(>CL}Sj9iD!fNCzbKZsGYVsA-ZHQ)5k zbGUx7R9noZDfv%+##;J>O_`@CNMA{o(C*r4uw|MUfV-VogM6@Cifud??&{J)G{Hc8 zraRkf$>oI$9qC4Xl%;Mc&Sij#_ZIGK!HZ9peF=_hZYlvz@Ne<;{O<@A^Jh4qN8@;a zFC{puo6yL-*I&fH_UgM=-lWahKDlh}9-KNu(i+&N+ zeYwh|Qc%|0Z5r1kjxFIK3XJ4y(`4Hr5ri_Wse@&94EGTEe<%jPs97Khl*ZR~Y<;v_VHeEbr2Z5QJbkium%|i;f$Ddp(NK*s$y0{uu=hDQPIxh% ztKkLp$W0?b=rb$E%nisTId1tZ-gBH-`5{}4WfpW>snMMlAmrHA=Oi`(cbH6&) zi=$eL+>A5xoLe^gh)|!PlGw9XR8~ZzrDwp+m9rlmkdNEA3#H3*FZ^~oSdP&@Nd%+a zxa|MD(${W>{bUbLMO148-oyfEnY`@5jOZZwnzm;Ae900io3v2236gQCoo=Iy) zCuNu3jL_tZCQH%QuHXE((7r^ki+$aTlXzC|aYZ9;uMA}AqKY;gTsnswu7rze`#rhCpAB;tap|D8DS=*K zaTf4hsiVtTt#;lyai`PKfmrbpTZ}eHrG(gyqk}s~n&k#G-+ALK)I=pP(%?Un8}5qY zrG7$mdAKoSUdrO9bc-)YW;AEpPj5AM2igFguMfLrW}z-RcF?XXvzz%o$dAPjwp%{w z)4hpr0`iykc}4|}qr<=fBl}t8x|+u6*J90sp4exaX4@9O!IE17FmbRsN;y?-9_Y$d z$fmgLxHW~>P3%xapZUd(n^%+=2AzJ6**qZRv02GLn$6%-`~4Z2hI4uL1;@R4|8c&e zu4jCnkxecfD0a=~DWNvMf9RnMx35Ca+$?n}Y2iM)$B4D_T|n?;{V-#L(NkkP<)F3wS(hzy+ZQH(HKNtx3SL7&W-sdE+E&Nu=5p| za5dV_J5wUq@FfU&*i3ECtHyuHB9?1uz`c3$OkoQZGbg}Jl~{dcdHGGfk|sIEFCbho z8V}%VPUebzavcfwrDIsEhLjx754KB%hK{Y~wj>DSI$UH+U+YQJ#9%%PN_AXQSsGfm zEi?XvrYr^qB%?b=e(&9D$b zT`@$`>~LW?^q|e{d52noM;saB#qXzD+~xV=10UB`?j}lgt%VW)nAl68GnA@VcHwqg zT>d+@k%pkpjfkgkF7on!|A~{8-UHOy;aP{Rp~lalF}d-I(eftu9^X zQ$GEA3H*3 z=N|6Y)7`H-jX>~q%;iL}ww_)hDqS$Dvt(x;L#&W(aK(sVAF4htZLGyYPS*;mrkynV ze7Sy6^WufNe_nVi#w&q|Gt|9SBO4gqeGtaGl|`9&=b zLnpO-Ha|7iE0yys5h?0Dm5}Fw%X{*K`e-kFoEQ15e>SD;?99J(sw3Q4!Etn&PiAAX zcCy|#Zn@B*yVA=hHQI_7No93d+)GR}$HOKLuJlUn?A4}1HxRvS|JszCX&XzU<(1Ut zNtOXqp*U1`8K+Z{o04bFGeVXj!XN7GA-bsKYW%^Q?y6C=TE`sy2l#R8YqE?rpUd(` z6^%_z0v05_)cB7c*tZ^p`kODFg>>Ky^GB7Nu`|TdHsnb zsl=#RbemlL6>}t~!JQ{{$5{RHikK>Ji@ydDdOy1=Iv~cy4{@-3Jv|oA_dR5ggXr^? z#Rcvkb})p*=dkHjg#;ThEV98ZcC3CBeye%tNX2cmNjCI_X{tXX|An3I@oN&di`T>Hz{>JPOv9Rw9OO9!IXpF{hyzel$0EtoOFVFThNGzELF5&8Lel( zg_~b7dMEM*Fn*N#K~*3RD@?r^-}d#Lu0-i?zI<1=M}{NL$Lqb3dN(ccM3>W{>t&v^ zuIdETIgfi{LPAu&zJ!fT<24?zu&YzBf@H>{!Mwb7%!l%`_bhR3=rf++j=A&!vm zv6_+P##?3`qTL}PF|+n~KlR2!M#NL4=}u!(e<0Cgz@S7KMZeML2u^Z>!52yp5iH^O zyXJHMu6aX~balkTnv<@I@ee*aF@QG&6Ae+DTu%gn%V}w6C#8r==8>T~?=QT!hWx_9 z-2VssnkU^65Fzd_Jo(iuEDv@N3lDK#o2n)u*sA5BI#?1(P=PMpb^KBea*@p zWua9KGvhf-1x>tor0Gos!8?-5DI6TDpC*0+<*lvpY=MtN?fW2)KH+I}kofy|>y4c9 z6LOwSbOgpV*{I*9pq`&Jq|mLBd1-Yyj1gnD!}H7{{Gp-(cinYPl^cxI+88`W3pKeR zbJNGysnCTYi|kQ<`X%7Y719HsS_&2Ns;>Ge`P zIYsD!FnW#O(!Qh96tF#yR*3#`)Q{!YyxOw(xu^$e^&fSg?*a!WXR$?+3rxuC3<&xh z%H+JaPtPx}@#g)S1wN?dm4!z{_=ZY})*9PmUaq9|5_N`PDer3LlML%{<0vvH&i=va zT8%&Fj`n4HS%?H{_!nZ2x^RbMk({uszeNox>6!g~8b$ciwyX`YOX{9tiJh;;~U z$u-Nfo3V-`)tb$*sjl2EiqD#NiSy23we+zEGw+)xQAYjSv$ZYDXZ5LqrJE0bGF*bF|W=- zER!ol>F>QTJs}Z6ZDI32u?3W>acNanXp(0?N?<-OJt!op&ywF1 zjCuJ2<~xFOT9n54RNlvFvJB=B2sl-Xd;SfAscZt)et$l1YK?y9DdaLZwe2=p4JFm? zdJ1uY)Z7kV!?(Uy`-s~nIHfE93X-MIpUf7MtXZzlMZgd6T%A2;Cszp`@0R!`fJKV)zApCY$Hj-IqT?1j(QDaJsBfeA}7t zJDFYUXU)@YdalYqgAioDxkahfXbZz|y^s|CE`_YL5i88m}{?})>}>WG3*~hQhEdR%kHXTc0 z%!}C0ocZ^CUTM67`BB7-YFEl;cDEI7&hB(UxW`i)IK}Srk$VvF%W+%$lb(vz6HD15 zu+AI`k~#!Z|5TKu*9KS~U!)H+Lk_n+<>4LcY_>QfEI55h7x*h52|krI+Kv91;HsyJHi2yZpY#T3Q-7wAcPTjffEBEs61!o@zW_E1i< z=Q>D-#;{0biiYC%#3;>i>%DrUJ;&}sG^zIMTD?Hbu4806x>^-ubxWDQq^)isU%!SG zpOkhrg_C6;cHqJ@%2xF2iSWONfsV*WZZk0xrh?An5b5p?k=!)W zEuE6msnjME>F!N;hlGd-NOvPBoziUTEUx>xp7Y_{XN-5epBaq782r~-^Ec=G%?bIV z3Lmf+YeH)SJs~e|;m2i@o?ykCh)th(GG3TE$fiR7*c2Iw^dkw6Y^ks~8t23hPPG2` z6U=AKhi4VB#I8io8*);`>AkGf^Q?0K{HjHjt9Jmlk9}) zrm71({PF4y$hj=G^Kvpo_b%*kaT!;T_l@VD|Dqbi(|b8peK6&)7zxP5ah z>!umCZrn;nU4K?CeffB{FoRXzU`R`X8ayUkzZ3Mxa^R;fU2#H?(l2j5! z`A4&FgYTMsjqmCT&SLo)jmjSX_~7bN9r$DeH#Pj?^i>q?aP-&6aiYS{@>i88eQ+zQ zz+UN;N9mgmvef2OJUZmM84I|FLC@FmCF@+@9U3)%7`k65@1v*R%vV~f-CJzbI4e&+ zNgj9q=TpW5pR&et>fNqjWUtdLJb|2dyVEj&&IJ?QL(l+@RL;zT<4QJuZ!c{KzpK&+jVR;-))4@4(I)Oxu78|bJN zr*iiCbO9W@v)D$^Yaph!k7-A&t*xQ5e2&>140I2IikDm zs#42EV%91x7_a-{ZDYd-U>SY=_U2}IP!Ix*3^~_!jrmU5T6H@MZw&FZ!#j3|~WB%f$!TwHLZXgTpnBgq@;yLNU zr!dk4DEieGa!z=e!zF9e8+}@J)rFYwSJfF@7I+KVc_G0+=cnGiyq_+NCW9U`sM~_g z2M`3A9zvgM0|b8?!0}2UT5|{slalaQp*VVTn|2|v>etJwnSAk5Z+<4dz@k%W3VdV$ zq%?wZMozS%0;}yhyO~P1pUakeeKRx9id3_EW5@)akdw3Cw0ob(BA8fPmlX9#AX!YH zR2W8b8N>vq7+}O+AJHi0e-|Iws*9#S!jR0OQzTlg0zeR{&ALztlgU2XJePt+uIn<5 zr!u!dpiw|v8=O6XbL;Kj36J|oC#^Y&ZX07oT(aqvN&W8kiD@!3h?6a&E4JvvDbFY_mMGP;TAsvXl@2 zu5XQD67J2mf>{H~6xqoIvof|P4KkBcO$sN{QO`Iwv|m&c4GsAX9)m3#vtM1oi!{U8 z5>%iQcVma|`wv1y-*$$*+tl0*=5Yi)yFA#O-GAPWsGw4eV(%QgAXcqhzMtUyq!Q9= z$+GW#H}LWjWS%Y^LeL}IvwE&*+BeXTpa+i9&kO<;>QIThOJuOcO@yGjF`0WEBgqWH zKvc(>%Ym&3(~(NF&v;L!zjKz>6sj31_4_WsA&8aK?V8G%T9EhSwZ@VlTJMkT30QAo zzjy3cp#~M1Duw9&T1L9Hd;a#`RjfmIk2RG~96YOKvZ>fz`kjZ^-D&MKdC<2DUy`uk z-qtULi$c!bA?=w0hijv6_g*L6$yKRM9iV$zsYIQ{AM5U)Ky~Tb+uJRDW#FI)d0;*Q zQj3L!MS!c^2tNO5*d0vkW8mFY5j1B0J(%e7oK`v_r}*D4Mm!WCfBk?S*_ekIoDS-v zf~z|3NM#~ZDt%NBxE4G7$vW@vM8IH-h2+T$#n2@KTGVrbw74o&t*U>#`8IAgtal@U5{|M9PfehJ;0c?J zdLn(uNT4z>UeUPy%rb$X=Q>ss!_E{`{zHI@hrdkB>r6Cg!5eA(ah8ZQSE#Az>>|JDY&wb7>I1!-&szrOi)g!F1;b%($(dDB2o+ima(1*DhFa_zSU5g zHnM&3BgP-INi5?Yd$WiTM0r+0C|;{9FkQa~%34|J-QFC}LuA-&XJBWkkEl6dL@45k z+A|nRcRtA&cSK5GFa@)og@~HQ2pH?CHphcFC38i(%NlsvO2t&=LBjv9ndh%pP;287SKp;{5fR!kkaYpdf9`w zH_vcymad~yDN3PuC186hEhr+Sn5KuEL(4ze;P_0FdBj?{fuA}GK2~H;mB5Ub4Hh?$ zYNs9aKVa!_MAF1BCNUf1Uy(GkD(~=mZC^`OCmRS#mT^uDsXYUFpq@SgWv1X9*Um< zp9{=^g$?eVfAfsc!f{X??F!Xwnflw|@7^=JhLRX~W(OyGSOcQor0J?_=-9h*-s_{< z!{oK~56|dt$2oE|qf*_ft^F-{BbJmEH0r3@R@lg(kKf){x5s@-D4s#kW7;e)_I~yc zJ?UN7lr_t8*%Rk7ji8qlU3hz4JLB8EoK0d^Ne(`8di}JD|BTG_7{!r6Y3Ozk6pJz3 zZ#g;CAt50_8dKiY&q#4_a7xWg70m(tb+r{0RQ@QM#{Od_w}ovG>-O$@jY4cHal>R% z<@te|apQKMH>=&DRxp|T`VS~6c_V2B5HNq%0CO76t*(YYY!WWV-tl!(D;TP>EB9D~ zEMUN#^Gno4H<#6CsxS5}W@_oI!m)`q(xzoWvBZzlcd_nsyws-uIEkw7pM+?N^i;n) z-VsrPv)!%xzg0yaB-uf|h_+QqXF$n6svBP}J~6U^ z|F1pnBx@#uAi<9}P&U}ai>_>;wVj>FfLnMjNYY`{uP+wTvT%$;9+WQE1sM{SOvVJy z0)Hcd&2ZdTVUNcM8GaBWAtR%zF>>wGX)$8c*5oC!v1F^zmF)W|oh2fuLwsPmjQ=x9 zi^Gs>$AyYM;CIopTo0R6XkhF88v9ihp5q++zWLpuN-c6NBj0i#m7I$z;Vqi|@|NOc zu|_;%#@fUmG&JyUvFzzQRp4 zd)}w#O9o3dlQ>*8+lnBMcxoyrpIjYyoL zg^>wG5-7Qj+3pShXB_ALTTB!y<1%EzEx~O$6g`?F;MKhb-#8;9=)*=s)zXCtnS z=(oD_yQ;@DyB}QAfOi8vw?FL~4$p%mGE;}KDn=wHKVvs+rmi;E>o$R$yd>~o@#yUB z?OaJad;hxhe!0Pjt3Gp8aYjAL&*iU5NY_WL4&eS7h5z*8 z+^DC%iJIvBSx361 zgQynjJ74D=cXD=;wN`)qDYmEQrzc9ca_YH_!lX9H)Nm?#j;!TBSJYyRY4Es5LzSJh zu`5UEo>~r(<1~HFq*K0$YU_kFN|x2)L~0yMiJ6B}`!(7CJVp8eBi5Bz;Yj^ofkgv^ zS`380z*~X|+_&}FYzF+iw&T)XbNYo!Bz0z!AU*cx`mmF}@f1k#lO;%!-=s@w0W2+f zoIxSaL`YsBbaFb;7C!Mi8o0mwjqAQkpZPv$C-yCv{=e)#f%>6mOgl=q08S7H1jh`g^Lx)QcRXBE#f5#zD5s&4I-*8T%-ho%=c$qpq<3y=Klzturl4mX*uJww9}eYin=w5M zz^BtvgngUrm_{BU7k0Wh|MsL*mh>{SS%PE{0HR6OBtL*7JUSCwR7Jhv)6AZg2NfT)|pY>Zp`(E4Gw=F)jD( zk0ipRb9Je;MQSh;qT$}U*Y!3NAzLzR-f@m zeUtkBJl>JMNhzhT%Ty!V1tLlBD(ipnbxpfenu%aVv)X>;ExEXhF5rOmX3u!v{6SG@ zuRzo@@Ngp3T`v%pE1@sC=Ts)j;r1CP9u>pdgO-+re2ShKOY16w%f_@7TmmpjD( zIaX$A_`*9Nq1+gs&OU(`fky$m|Kc-lQnWrR41Zw$v1615pQ78D&L^XyyH40raTR&< zTl%d89ZTQSyYp|4MB7dh>_3YI;+$q%K8oZ+k6qu|i(p>~4E3WsR)BwsiXj;15pQF; zU4%j2>hZj}T*E^<^Vfq9($QX-8bZVbP0y(C<2Z7p z1w1u)crsP;A7s`CR$IV#7ux<_m6gOAAkcoB`A&|1X^8=F9{#_y1U>2#Sn2D*6peUw zW-iFC49g!jrqlO6AX=|4W6t?qqYK%oRFc4k?L;9a>+=UO6O4Hcc&{6uA?oqk=4`;V z&oczEc3#ze%;dD%ApiQci=WM9|3=TEZ!7Xv;BZHc28hg5DFr!o&Csm+{>osD&q#2$%NtUGTWwM)gC=BbeInE54Y$RSd9jQS`#BG4rIJrtmUH z${QHaAiERp#-e;1#t}v^!THVRD+eRmhk(B_&c&;xZf5;E3*!`-WX-#V;Gdd9p=hx6 zCq?i`kv)ms8s|xC%zM)83sUW7tKWQhpr1VUNdZE=q32YgiwEAJQLIKo!^cze0iQSnLC3ZB-L5uBU637?o~Xe9RRik$I1rz?f&O16 zCH{bU0jHNd(BTp}xza?NG%k2KMWLT4sN8e)@aXnW4caZYcn+^0U+JXe`f+0Hw}~e? z*T2R(bKCQy+phlh}StOaV@4~Dpd!2uNzD_=PvsB+e z&cGHGXtSmqeJI0szJ;{&)@~oSN6T7p#gTv+;MYF5!-CT;M;3(-);G7BzKz< zLhb$=^yUNe7HcM*fR2s8FlV(FyJEH1oB=8?A(@t}UhiU3OiktXPvD2Z8&ojR&2fL( z?A^SbzHr;|0BVcWh73#oLAOK8K4w+F2o!tu0E5n*c7FKN_z=@K(5kq_6`+PR8{5YJ za4JA=Vn(u~EEaW+D@7aP1ocHY!0|eS`YLr*YAmEnx2O6q$k%pu1k@O7wVgbPQnoln zPei;XINb^{Ikx^teIcQtGy>)#e-FloP!eFgsX6Yd#TD5fxt{RaOpy+6uccyiFMs(O zylxk_0lUXbKT+1QFuo*OAhY#6AL~m?!cw5R99#7B_3AM76@6!S59*-GsSL z`J*0RaLqAjBjSA&4l_vGtk);FRKIgd&=Ot#+IVG)peW9P9=)4|jp~+GNHEsG?m?GW z*<-PJ9xs3M1x|<@_dR8+ILD96T4{F^)uqPAVG*5|;*Qwi<-5F|F&e(c+YBs{ahS5E zzZ)~&R+0C~1{wmB+s>Ui~C zO?U9=Aiow{+^Az=HEKTQX0gx3R54Ig_ELJy`nmi-8X|~AZ*(DOHHVP3fV2iLFR!hE zQ%|em3?@IhagK=8dic~3WuzY^G zO)%SNO{R){d_`;S6=#+*UgiEk@ho#-Z_t@e^u1i6iIS7je?!Nq2_#{zB(5Z;21S3} zYCjrtS>K!$iVwH4@5plXz7?Wby5~%yAJQ)uo0MfKi2Z&~6>x{>WTD$ysG1UXdhcH} zQ)O>2IbAD}w7Rim05=-n-yBXg5VUSZf>J=L6l^hI1+G^ETjSHG*AA{GD;oYhO-g{s zCM)?^_1edJZsj*HH(ag+UCQWaU~AfH6Ds?V z1{hPUxIUKSr%2*{HsHmo)=OxXmsjIHsPV;&>BK7zoYhtAa2K-$t}NeNzs0n_&PvHg zUlRGW)z~S9X7eZ^bu#Kxi!vuUBTpw*mH^YQ(V&<=WV)VE(-U{5xRQ#2yUwv;Cy|E? z(Y#$WeIenQ{=V3Gd!+;|Y%fT+@ey~9)^eWr=j@u67R!DMhhudr`=Ig!H=o1}u`}ON zUi4C@bi?a@A*xhEb2#zBKRoD2=>ORm8V|4qLKvu??kj|A@JXVYL%xH^`tdS20J6wP zZt|GHyrg_~FJKy4g^0D9p+8y8|1nM|4;r`OabIH9-O$ZNaPg6>So7w-F_hg?(Q>tV zWvKKR+zxLhe}Mh``Dtiqz+~m`Lvs%*Eq6XjBxSZ2y+8U{sFw7R$7L@J_DJN- z!DvRSbI6w%^KB;MZi&&@28eZ+2e+ltH;s>fr1{$u-sg#*8j%}^_vRPYTAGz8O~8n> zS}R6&iF0GBJ#WN)EtR9b)4Dy+H;(M-;4%KSvU&4Ns1@ynhXzdL!&v4-{u{#Wr$qQ# z_*zf#@MsEt57

    $ES0u(Y`&ibad?O?9?9ZV4{bVytC}-XaZZCOErIS{3%PX0^S(jzZTT~ z$0=u4@V`zuB#SX!0Cq#f&@WQ>_9`9Qa`4SF#-AA=);1n2hY?t-Ybni_Px)(yuMs9oY0@L%XM`7$+LCqZHkLSA=2a6BuXZCp|aHC;~zRs?DdNp zynjNEjV3>ND{lT?B@ln52tl=Qymk}$c<*K?kUz_Wg6rZojkW$Rg)w1?P+&sXX~*1RS2pB-{@R& z*c5imCha=uBO13E+_re_?AOo16BmaQYIj{8UO!!KwF2{E{?!U#T9} z3BSV_3`ObuW3j_16jojKCshx@0_@KynfArG(x7g{)z*3FxmL)xd7scjl31BU$dTl6 zC`EOrtw1NE%>qbDWesNEnW=L66+Rn!zV|r*6Ns{w8DcCFR}crjWOgI&BPPnzy*UCf z8WwHfIqutvxF7H=UMAya#gSp*={1z>M!s~c-5R`kZ^jjbSS!_isi|o@d!5$utzAu& z%Amip4D+(K_0{Gj^^vk)AVxJ`_MD|<7i(s}e&De$co^M#p(N&PV1BS^EX}9Qtf6 zxRvXmKinhxxMTc+jILc{FJXseJMo4=%gHH-6)93FTLim`7(-tr0&`0wDQ)QTBBlaC z&q-W*qgx-I7P#90x6-Yr?-66Xy9;X!>1f zpJt2S^QA;YXp_cblyh#-Y@g~syg47yIcP64EJq zMx1Y^Z5N0BvwKZ1yN`C(Sqw#7urJsbQMt(a75dX)qaHRQLIf}6m>ySUehFh}%KGK#oRZfb7yOjk zeqt|%6@&t(Q(Wag4Nuq^C9eS=<#5Zwtf2h|2OS@ko$uo;8_5k^awN|PqDrD|nc2ev z*;)8XqOIDM1JQNyW6w5gjhelVo;UqG6P}dY_jvdC^v&1Y6bY{^)`hf}sKYNSpSP&D z%e;G9rSabU&HeX(X0}(q4+mVu>Us_94ShGZ;_SsWN;n>hZNUUjDef1M%wf{~;|KcP z-CYi9lU}U_-rd#`65Hi)LVsXN&*&;(ER-T zap$LJtE;QsH;nC6KJe2LPAmN!IV?lakH=A6GSR^}jgYM?vcX$v&~!Os8wtD1lNR0# z$DzPq`+IZc7Hk?+D-e+f2;WU-5J@dHP25@tR0}#ok^1T?gr6uyp$?SUufOnM*t(hBhMKHb(GC^fu_6BkOk%qlKZm|49|7m$pn03l~+Trgs+nsGIFJCG#F%HnPX zTj(Vd0=|rkxnG|<9+U>&c0HMm402IH=j5CNJ+tWV{2maydu$kst z#z$XsYmhh!z#nzr?cGcS}#Ve)y_e!;;c|qk06h8X4olZoJPR*v>pdFta0n z(VawZxYhAo7GgF6DIKkglPnm1tjSE5gv9gNDb4RUB23|MXU7hyC&yLYMuQ%(-(a#! z((PoFDSdcSQO@%cLPcr9DnX>j@noy3*+#tiBU&O(2(%Pa0tphtNGG=9_C@wz&HWs$ zFYm90FhArcJ;Ta5Ss0S{NovaMqgWXV@0&eLqWccw&!0cj-)b1BeUayK-e zwJFT?z=ot0>ouvEA?o$E9|vxH(jw3qC``>4ezk!c8m`m*9@k%j<3Qp^a0B|$-udg> zwHJqqh_%`(m)NRK607he2=oU}BY?DPwSMI{ZcT5meTpYK*sG$fl3yz^^1EtuPCP4~ zDr|&8=@9kg1Sr1Nu*oUUKfz)asCZx4o(2tX-FG7@<9mI_De^A2GOtGkh?1xXxo%86z6);wB3kI7A5cQZ`#=CCRx+YTF0NN20l> z772xef9ND?YHbx8B zp{~NuGhdSy@Cjr8vG3yGaK5*#f2k9>>1n~LC-gaONfj;?wVR)}^1CGPu}7BI2ij*V z&xJRTPsp5-ms1gIWj7=#RT+#L_J1s!ekFn!l(1+tyt+88<(gCHv_4jCCU8z9E{YI) z3sxO+tX<4PC1vZZhT_WKGI7QixV3kKt_93845l;2Tf9qVMn-uAAA;HRUNRygw6v`? zC{VngGBBaB8wcVWsFd-qcNIa;uDPaO{|EWz{x9+c3Ux~IO$SQT;mT+7fU9vJEJ$zoqier2J_zAv9fm6l zGvm{R{dsg4q^~YBQu)h~Hxx)ck>T_P7fJ^yygqpfS4B8|c|i^n6 z$6z}JSdo0-dt5ihSG#4(=g`!lHU+GB*W34hT071bG~22G7L*-?pC=(%sHL-OMIe2g^K^4Ob=AxCOwTR@wQH?eT+q}qf6MIKFftszwI54%#QjXo1 zcN&}^tEO`7pG76ca306QrReE!qE5avXd)=1;#F>&gkq7o{O%F@4Ee9~7NsR_8@}q8 zg1f0Ggq!c<&%`_K;bPm9NX8bUW%y3geNVX=78gHX%Z0mL`dn)!5= zdYktf-3E;gwr>f)enhaS?-QvxBn0_+dbKFPa-}`RPQtLs=dgWJETL<#LIC$ysPzBm zZH@o0Z>xS#IU$R-v&o-hv{z*!5!{K4noCPdsQfhg-YO=H(BZRzM3(NKy2kz`uJB_P zZu3zHGCYo%?sWZRO-4MXMU%of^?x2+cd^l*o#B^r$&x2$Qd!XxKy4S&T-%qK_++Y@4B7unh=V-^uYQc7O_*XQeL z;}CYF!YwS=T^z)gdH#v614#B=%l(?9t=Ql0zEW=+&sp zq>H~y3N{vvfZhiAj^sOUzZHBd_OaXTUU!i6k8M|MLl&0>n<~;PBC!vFTZyRe2CSZ@ zH}ofozmBlVoc)DSAj7gT84(^HF6jimkzO-pNuskvdLCPixZ<^V=4mtwylcX!~JQ~yk$EdvimAzTueiOei;w3zM{jHNWWQ)XR%;e@K1Jz|^&Z}lSvxe4pG1POb< z-Z-Wi00v?(M)P=14?k9>bEW8AHuFyv%_1YC$$hmE)1dnsZ&LKTUq!pToL1xC_CskHN2qS9c_Ij*zh}KxaqIT zx|P;gz4hzgW*>f|>rhrtChNG|uZhjskLSeoAacm$hmTK{8>%}7%|aZS;xKF;rS{Lynzs;Wsd}TBE$1TH*ul^f@Zw_n!8f$_-zQxn z>N4O(KuQnQ_sck75aHBhxWxNq9)~;4*+%B9v6?SX| zHc@5AvW3td45ExBb$Ez+#+EF3-&xNjcx`ImD7;D<{{78&?{Mr1p_^h6zuHV$fkBgj z=TkY#D}S)thonFu*kuBP1gW^a7R0YxdS>GHRg44%Nkppf7=MOzf#*iawJQ$=6T{Jm zBsg_I*7&1J;M{z#owfa1XLqJ(_w;+zvmB~js`&DGvS_~R_axLTF8O&rLv~g@o)mRE zEAKO8h8^YRi;(Dt-nN}uX-%GwQ!kyQOVlA+9N)vR+__97x6}UJHPqD z^j3a!$naUvt@UNJB4*OKZ%_GDdn#FnU4owHt-Jxf36oK0!h2^hyw28IM$P3QQg)4! z29<+)gEy1K>fJmGai76>rpV-23hR~1)|1Yy{I8hxQD2@*=jjD?yBuDK}dvvls+s@Lr}`hjx;IfMA9RFEDzc|_*Fj0on5lxnNOS&%?QVXkd zyp0$BD3I%fqBgxteWLfcr}3;Qb&>mx?!D9M0X??sWE0R>#GXu7?esg;T za2?>26uJ%gik>%__!$ZID9^oOyzezH?xGlWXVtZ+ z9~73K>}(J4FW9;XE0ep*@ix{MFOhwCPWdlxJDyqJuLX9*#|Tl=)&B0^uWPE_PL2_6 zP|ITb#DF$T^1{|!L2|kg)xhPpdDSl{Am46S)li9*(l{`}z`@UxV^QXh!M8(pcmV7i z6G1Q|c!|5YMZRRe7BI)Kj0yT-qtppn>{wKq1~;%%=6w7|!A;64Nf~%7QDtA~+psgB zKn8#@Km^Jo3Qi|!D&OO&A&l+Wp1I6&ilcfW| z7g_3@YMCMAW~HX$TUASVv>qK=`Vxl->6M_(yGKI)O+-}X8aK8I# zb&CO`uT;N*nK6`6)#G6C!L49?j5k}Vl*%ot(-+V1=|$ag1RgD?^+&%}G$c25lWn?5 z@Hd1|*)4{pycf6sGtMwDrQF|oNCf|NywoS{IY#Gi_t^BYUQAhY>G?}j_L9lq&tp{1 ziHLO={aS@;!h#}cQmvCn?LEU39mHYY>BxWiGiVy{nGzud-%jt2D2qN{cM53Og0(fpZxwtAl6eF`coIo{h8ZI_9R^* zkzpa4;Hcg8|3~1@{zu@)lG4W2e_~~u`~6ovXGY3LFYY(pD4>Nn6_{+Te%%1p-;5e+!HU^DO?^merZ4yO-oZUFLtGciB?SDZ;2#G2<# zSN&8qBQx`*N!}Kgo5p&7JOlM4)I0&uqXJ@DpF1_wcD=4-FZoPkL@HQ-IxbT5KHOfPw=4-y3sRti zqVedlq5CzL#29PCuZ0M~kdCNVL>si`VZ5bzq~t{5Gfrl!y|Z=Pg^DrmDu3hOhz2Eh zxYY`cXpmA0l7;Ee)PV8wQ*jDz^!^{@_pg4xKs1A&*-E_b<()l!%`RA-Ov0pvQ7m15 z?%HiYVGZtb7$JSyJGW4Jb&QNZ+uKY9{Dqp)t|d3%VzwL03*Gaibo&9bkXvCpSZ*(~ zI9J&HkE{0Azx%1i$QC5BoGm1^j|~Jw^Xj9oiK}4+ak2cLDqmR?aMAW{TWjDAeF7+8qtmnl0wY5UA*u<7(Q zhG6FVt9ZwEhom-_TC@)nBN&fV4g{f{Gk+(HEKN@kX_G#pz>pH+n`qwX@>?-2e+l@; zw|i%<`jj!X{yH^?_8lAUdD6=kpMxELei!H~T%rb!}`#rPaXVY=%cX;N1Ek7}*Q6<>Rx%Ecc`Z-v6N zIA{>$i&Lw;8Z}Nhh-YbXa2{7Bn(4*|)eQZis$EmjYk+>=gF2_Y*iD76FID%~U zmwioaZB0B!?XBD($r|d9`jBZ2A|yBLj9dH>KXb`Lu?!Mo7C9pH1h%#4<7}aP=eGXo6;azkgJ%8C>wDi}0P4X$ zW}sFZV2~ht#(-8&P%dK@cVK(dz zCfrtKBqvtH2_#hRhqDyxipf0e{DQ_EKB$VYJ||z`9lR`8!!yc^6`;9v>6JcbjlN$l zd%d*}HsG-w4USCsKX1S6-KpQ~qHNbMY|BUcB*o4#5z3Vf_Ge;Yj2SJaNeaTM3~~&- zl`F(^Vs6zZCt0vxIF>8P%e2Du`lem3F8vq+NwqxI=j4%+Vz+f~qIL~B{Tr^qr|j5M z?&Jhy)Aeu3)lO5!(sJ@6Oi)@bD}K1+jjL$ewUTDKL!0szDD5@M&0s_fF3=#tzs9fO z>{qzPqfpD+*))vTZN#uz|HSh5IQRxjAFRk169iV>lQA?=ZJx_%dJ&#VHT_*8W>oYZ zQzuX7nX6k>VwhltOkqodgRTnCDt*b#!X z>|b2!sOab)Hk!Sqer19&OM(5oAQY#*IrCIB_kzqHe#)gTU_Y%y91oSM3feGgbX*7n z0k1m{C`qE3>IpMMlCrc1*o>7zbQHo&x(z7bWNHAi_6O8|J6s81gM#Akf{k@yu*)2QJ>V;XYAyw zpj2l`5dLgqyp37~1CC}r<~YoNEjdpA)g~0`-Kqm@H9Akffo|N#y!Zo>npf5ic%;|4 z0k(by70ooegx`Xq=L7LNGdop##QsV?ZlvwVBGqeq(K`tzuh|RL`YR+tBBEWOBBvsQ z(SQCnIxf3{n{fR#nuT&2FQzL;r7ayv1tX7KgkE?+^l?ZRK8ogN{MN#NtMzC72@V?P z8kY6l(ep)A^Hlk)OC&+cft_09`m3TJb%mLep%hCD9l_Bh%>K;5LW(QYX*DX!M*E+D zJ^aZN@+kRUX_-AYlwbGz|>B)-84o_g<}G)D$$ zMK|M69@u{7XhWtyKj%GC|7KuKX77mz+@}{y)Q|oCeT2_)OD-0? zzFNr#ShTaB10Cu9ovhER8E)8`ChgV9COVh$aR|cCAD^fM)L9T~(H;{cASp6K$0K=> zXvULQ({~K`gaJ#$*w@W{4 z*-d^oD3!pPlarh3$oJ15^}WTmiRyeGIkxmh>GrgDXOJ~dYqlHI5xqy!S%? z_x`P$)kXJK?l?!i_&dqz;r2G?5EwHrk=Yk`slU$ewHH(iJ8%_;AKS-XDIr??%Une~0>q_;9DM`n=&1{idwDZS(Id9{P){WCAJU_sn25BEgGW!JNyIFRA9wtGc66dFK=Mz zx=}JTY&=TLgwuIpr@!n8A?0^Rh70SKF>5;$u8SK*oT$r6f^I=7x5J3j|_lXc&U( z(!>0Mp=VMx`c{-ima*gl8L(@3M71Y76(*gOV5(m$sj(U&opX2X`6XjY>JPkm*o?O{ zPJcz`@QI^lwA))g4%5CD1?@xRsZPt^VkChT;Ny+%Flt8gXL$dxGLPR-Xm@6KuRNwHO zPsuTzz|(Cczv2*+E2(jD{^63?`~SNnjJ|l`fzrp%dmR4Id_wwDq}g$N5)vj(wI_~c zq$FSuc@LL_uR|JyBpK!nd)dcOeT8tKB%b#AD`EZsY6_ky%S!Z+6!CFOo2hxS4~8WZ z&O)Wo;5d0NHJN!=D^EG0aSgO%t*!|(T6pT%@>?W7-x2R6h5n6;n(1tRJQFfI?q6AE zBoEH31Vp|1R*2nqZ9fa@a`C(o`dcl%Je5Gja^cfg3B?KC+?E4RNB!Tj*5gC9#Y%1m zsU>(nzjTlRp3DaM9j)ITu!P^l{4L_{%Pfn&B55Gy!PjnzCsF|>Ee@W_Y(iBTP!e(+$E@9y&YrtQ*v_&$5xt^3BQU6-`y ze&^PjA;ZvI@eTafk3db7 zTbq0~PfwiX$59l5Pi)b{95AiX5Gq92p#a3EW78|jR(Odwp1HKRV*RJx44Zl~Yv zEq7uea&PPQebP*m9lHloSsZc?A-fv!)yamn>A8FlSA7euFI1X(<2h;+2FExCwLC{W zH~WD>{vR&|A)1=4y$t8`alIy2?<3&KuWq51f-WBARr;#NH++J zkOoCSk?s!Z?hXOz?go(t0bwm_QFkuf?z6vh&V9b`KF|GUZQQWlIo~nI96f%6U?z$Y zr8L{A-RyLor~Oo|QePxPp0B~X>=yCpw15e<0e7vDe5R*-xx!<6k}>tB7nAgkxeZOb zZ5g6OGWMWF&x@sWYQIUQI_Fw;8r%)E8HsS;?7zEm(qKYKNUuM|Apt~BT4nY&G7T5s zFV`ezrp9#GJ00ZjZ$=a>&1TqYPFXhFR+3q(ZQr{^c)^A;XK$t*6} zR2Mwnx<+KHgymK7fZ7l#UXz!g8PK9vjGpUmS6hg~YA><3bNs{th_mT~mRW&{!UQh~ zJFMYoR!uryIYLy;9k zRN0N-E4oq)ekW0^doQ0WAkw4GL3yZJm35-Jwdnz3xhvcVS{uLeqU8nfi_mC4iXlsV z?7qyrquP4*nt|!E_LgNK?)zNAj^KkH^fzIMvOiWD5vXjH_i??ldaiNHq ziNlt=S|>jphwpy#=d_s@m=cl$Rpr18Ysl{#%Ts+-Yj1X)RAWW4(o)^W=YHXL+G~?w zbhb|yF6!6glM{?<_tRGBq-;_3hcKxTEa4iohJ&3FZ+@{oBy=}9{(4k#_Pp}x=ov3r zsIcF4RMDkh-{|8W%QOU=M4S##B4Z%?Q1YnuM<~x6V74^ky+=m2nRRVS==Nh(8T@?Q zZ;14?-o3h4rdubh_IhMv{oTYvE&Mv>FWTD;6_lO(_4H9!ci)m zqdww86v=h>q&vldJUKFuS^(dOnM{f}=bq1s42IWqhSxf5VZtv38dM5E`z_` z+z{X>FEG;u#-j14bf^I=yhkU|jBih6V@tv|jbF19q!D|m;(W_n5Z`5Mc!hu|AjDcc zURvJvP=89=cyTs1qECy+@9Bi-e?$5l4$KliT#YraPmJHt@S^ynT!n^(?z4vwAmS!oZ@jZZL!uc+umu~f1Da%;K zxaf3>J*((7W^YsUm(=tpLIc4}pSQah8#vLcVLx{iwFK-@7E`;E)t94hb=G=rkdMo7 zYJZ~~BZ_{>_M&GMPk-u1VHMS?dtrbF3r6R|WWRCpZ#jwdVn-?q3b`#piil2*m#^qS zL*qv>y77YEB=WbXtE~R#%xt~|g|G1R>qu(9YrePtptQ2m#9m_Adv|Kuo=o{T4RxQx zmzyv7@lP#;;9hIZxrxy;Z!}8M5_=lZEE#cNBPWFntoBENz-s+MVdVoEkVw3%_HlS^ z&*y$-#HwVr_3PJ^(dYhffKzw<@jFs1)~_x{K}`%pV`F2lL*LuF(;nLAoZxC?BU*8V z@7E2JjhwBo^h7&!)F=A;qbudB-KnzPYaRdsSF=OQtp5dTUQL)Vl<4S{z(#G>Pi%py zdS^~#Tttz%LS{D|RDoEHCvc&(ue_uPx>BXn^ST(0~g{*zLmv1kG5BGm{KCRrJy(Q<~cb)Q1 zFR)MqsLr%6xnqY?J?Ii8U(VioskReb#?wOEtQdro<75A!ka&1~bFjZ3-{z4gH09bb z$#C=ad9JzJ@YG!ft3|~_k?C>?v{EM3QQ{HA+M{cBR>J*m)-HW?Y})&zt_3q z|DhH7b!w=1muYUj7t~pN)UG@3Wl5uwxxgE%B{gUZ2`essq{k7Y;(qs+ysRl|=nj?v z)$98x-`B}!QC%L?wowndl4v}a@_dqY_uf4;sUmUD)YR0J zlY}aplh-=nrs+$E8Jv^w+R+?o2iaNz7t;%b`$>tmf-*n|G^ploNXG+l#<^=#LA2*w zHuDc$4}T6h*%Zq`UGG6(wFgt+qGIp+3~MV)OGAAKpN@!_B*8bwq(krYRTowi>X~OJ7md@zG_6ng*P9EI~U*gzOF#Pr&N8w@*IPk<2V0153Jm5LFw%W ztxd52!pI8gPYTx!X=z=C9k5Ze?HzQEt&(H>?=CZYajtq^f&!^BU8X)(CZzra++3Nu>lN>VEh) zOHecR2_UIyD?=xp(Z7i?YcZ@uU#v|7WStw|QN2r-4M7n2KA(N|M-1HC;#Py{k`?A- zJ|A&34n(iX($mv>o5#iUu0I999jJpol@w)zG;^~u9>=hI1^26NzxX(eV;z%lE0$O@ ziUFh$2Q8Q~P+q>yd`R{zXL=mk`gNQjzVl>IT@#Uww;N$9DJVSfiQ10U*_~gdYWMc=M^HpWoXO6Nue%}Mq zn>?y!*NW5*u5v4F0;@zNLBo}aPW+?!Vb?~ZOD{)J)ZuPw?P{gg3w5jnNi>nKq*{?Z znPnnkx6dZ`5!~zk#J50LP)_8e&wGlxED9o#SJ`Ss%hnir^r6GE^| z1jHI)Ok01%B#s72h}e0`f*Z0-b+4e~?hI?2%ZDBU@tM@^?+H|T-f>#AAd9>I$T&|& z_<%dcU~o71lG8Iku6o#V^~rf{{W(ibY1%}(PUm;J%lx%5xtx`&YIWjsUd{|1_iis1 z{U#2cI$wncEAXoj07x>3XA3`BFz6Qu3=_^d2?l^1J;p4HeTh;Pk@q1{1qRlVl(=o* z!Y)8aJ^-jn=?@wVUwC~1wEC;8EaxxCu%DAoJ8+vT6+mnKXG=5ky>$^FyFxvstM8OTadZ5xJ`U9?Y0@B( zOorQfeXTiC=*F4Gnn=GUPkSFEr*2B&mH+V;n_jc9lYO|`^V%hzN@|1YPq!+OMp1S) z1J%kDz>p?jM|C=E2^qdIY%&A#8$K4#aumE#Z=gA{Fp85vV|^p0Q?^RgO7u}*_+DcXYMpl-mQ(9vyGDxZTQv8wpi&=BK!eDm ztnk7Rhz5+O$@sQt#-^3X77d)PF0(V z;171d!0$r;`>Aj9(tG^Cy1ZXMr8T{#8%(QP9y_YV|k95V_AJAoSc{5%4ZsyiOm3=CWeyMh7#{&)eP!z2$nL&zcx3coV&nz^tu5~ZGrLatJG9j$N2Q` zwm`83!Z&a1mT?#c*SJ(Z>n52}%Jhv_s86?szoqENcBg%g6C$<>3RRk;F0I5_G!rsZ z>M>WJUkqTOZ^Je{G9%zx99Nhdy&pW0!tc>Q9z`v^>wEOqI;4YB@1qd3AFh{|=!=Da3J0Ck}z0 z(Ex1n1;@&N z*?@X6KEXagil=WN;Leokw_(C}=4PtZ_Ce2X>+z2Ob{YpIwOl1etvbiRJh^_07(Osk zat4v~{U#SN!Id4!^#5i@f-d%@#;*s5hbnG+x#lV2?SR==hby@)CkMY{<-bx6AP4q; zfUn{k<6tkArolJmnou++-G*lmG#%nTonzf!xMK!%=MHM>Kr(=fS(>X$ZyKBPE?#1@JGNXx&dpP*2XvI$?qxNJ6vJb+c zt(=M)*Zou}9?6f3c(&iYhnvEb&~XRmx8j2@t;v8{Vpc(RdT%k*Hxb!cteLs`_;tf4 z{S8dNvt3$pfdJ1O$m=4EhSF3O;#TFZW<6U|;T>Z1d?Sq1dD)F*Y~@cTgD4!Vi_vcK zF2bk`cOG@ciF{W179^J3Ah4Kmu{@>7`&eV6=x6}Eq;;%)y&Dt-`jwwQNCdzY$M9g9wqR9ep85yT20 zMBFCRbnmI3_c zVPaWe?EL`!_H7`no*^4g0_vte@mXzAx;8#A4(wkG#pNz};zo7RY&;bn3SiFxY}@?| z^T_tkIwRFyY#{!2;^MJOJT9vl&{=iI-ZU!WG)*85Zu-umqeyRHU%190AEPr(e4!q* z`Q$y(n}c%DZNkxzDNih8m2`j|grBG_vUWs<{*Q(AX@fHRccWB~Cl1FMv~VKIs}yuKjOoC`93 zIa7!Gma!Weyo8$oW$>Hly>~-j-az6R}cZX2|M}9TesWGzw2gl)VtH` zInN`+-@mE>;k{K_E^!tteDQRjEK4wO3tMf9j_ldL?t2yw`@GIn7oLD z#UH*KTk2mkT0v;!7VjppOjoHkH#v9THu4YIfu?a@O8mjIx_eQzuWPi+)Xb z(}H0=@I(i;d|HoVmff0VZknk?hpjIN8^7!PvnVe?US*C_NoXd!Bx|nrNyuXK#EXDT z0*BnUHv8tph;I+ocj4v-dC=)@+jGZRvHTuw_W`YwQ~1F`eSjlNuF>hwarvDS=QTc~ zBDZn{-^(L?lTM*fiveqTiK86;`mWrH_?6nzwH{$(%j_~xA-V05w>x{{zW1?jW!bwR zqvZ$NoA`=)&{>++P+y?AcKI+^=6sqo90&hx@8gbTt(S7W>ts+qc=7@xXGlhmn7bX@ z3`UodYwNVHl6z(ysUmFZ>?Z)NpPA7ue3KyKo*_`gh-bvaGZhoX6B)mWtD(~i8MkV+ z2^W)n!7!tw{)*9Q)~)V|>1(2kE)`V-ltjGtDAjc&ITz-7TDiU4Zeg7d)7or4{+QgU zI3OJ9QavKK-JD~yE9&SXG=pEZ)*mWI=l(GKLTE_UG}C#+*zVUezZXsO?in&$+*7&m z^VxjfGNb*`uKlCS#Gl9Pds?zhq>fwDGKX^(x>KIJ=cq+a&1pCKxji?1KF|R$_nPat zd=GwjpOVqI}CRQ&03m!{*mpFtx9dGkPY+`4bZKrJpSBlc;2X?bm!^O7L zPL*0Sse-0t&Y4a&cXJN%b<1Rh+@bH>_7P9jHxlGJxy>f?O^W8`2E%+E?Ga~7mysry zJ*V!Ti`zufL|J}W*;%adJUvACO(7UIav{W*rxiuyP35=pae_>Wb`QeN6JEzJEMk;d z1tdepsX7iL9rDJzT|GVIKtHBQ5>+jt-#MlM!Fzff(>P2U{3fQ-=Azeme`<2TaC0H^ z7&F)GtIcJrtK{w%oUo|vZIOv7fgfiNyz`HLlEqt~tUTx(hI*4gKDi8AT)#Y96L!+* zn{!%VohGeW$f=#?rQ=&H#3AE*p=k0Jb_0uW`2ptRnUi49#Xm17w^1RBeP;TrV<19~ zGH-HK@22x8LodSKzZsyBx(u5fY85r}ZX3;6)Lveku9yg)#;V#TuIYN7A0(`u<2?O- zE%u3-LZT7;*<+p7+WALw0q04B+l+@1NRdPFpCShVV%1_DHwU;kiqWUZay>qxyAG2x zmd~gZ?RYt;(U6(c;o*Dd|B7tAZP>tf%)ndSQ>mj{$>*ID+5ZN0&QEZS8xifdO-=Nm z{`By*c;rZcmrn2Gws5jjkCV-Wh|bBztI|rxGC0{u>1ml&xcGh>7ky6}*OpOmoz6Xa zbs7oxy0TeMJ7M+h-U*9u0kUM88FCYq`v*7@#$n=B z?JO(aZV*2lD>d^rrrz)nzK>I;S@UwTr~|Li$(F84&tac zonl)Tc^42WyVEGYYjL>rZ#;tfTjV!`jCc@1PDQ%C8J~UX-nW zBoYqyO*v@TI`Xlughb&?@Bnynr>6?STfGSqa<4av{VFOdV#RirEa_=ROl}*H4sVh2 z+NEnj`?%6P4c^JR{w(oZmn_wGn&UE6HY%yVl3P@<|B_oOgO$-O`v_;-jnWKjJmaz4 zNLdEsjM1X(5E)rWW(Mc7c>p1+nCrIre146|F|B{Ac~;dB0m(O8k<$3YO3T^mXK_kA zceeM6Bg?GGm>&I{FPTnVr*#-_6vkdYp4$hO6u}%h?M% zu3f7-3gMLHZFROX+O-wa4TdFtpS7@QVDh|llalq-)wy7v%7E^8NtEa=OmTZbRJaL} zdgy?3a#QE9DaltaX~%Hlt&-uT8lz*4ld9{tqQ>npEjHVmr7g8=R;Z$<$JR63ec7j3 z6@pV&9^KyO=w{P3epoG#)tp5jTWq;cMR>)oZTS zqBXP16B3zgPzGzWJ|rqLtif;8tg+AFUpDM9jjE)mtNUjxDeJYqz zWaomb+29sQ+CTg?ntUt1>Y_c(fz!!UB%X={W+nF^iaLbAuc5dAn>)Gc%)^s?c|5jX zLI_N-OHG*IIrs_dK-j`Yb?Q zVD<= z7{MrLG6ugW=^)=yCpSj%&VR&JHKt4-!u9j0|D%w}&&L5~dC*rsnknGW7I{q?RZ408 za!JoiQ%Z{+?5rLm_TX?6I?t8o*|X5#9Z!<)gD1Br;8w&QFKu!h#)l3LO^lXS$pq2I zoMaEJZH-5hDYQ6V9NNn&fvH#Oj32Fp52O=)=@#BD1=ijWqQHLebWpJQJ5Tj5AjayP zGWwE*7uHCA8uRaWQm+(L&6jF2tU`-4%3iTOsDu!*^^v=kST({bLzWOc;mn@1{Tnj5 zJHsS}-M4klHydj>_9(Wt`eXL8Cz1jUkBt&H1?q-!m^tAq_1Y_;kEIRaGt44b1`g&6 z^5n-E&#p5^5j~1e_*s39! za>>?OESi+kM8kDE9`G*|T>vc6DxaE5R7NK7Hb4*V7l6GSGWZ6{7!|}EJdF9N)J<-# zJ-a^>qO%5)avLw6JO}Mi z)t}u^C-*)^7dRb;?pN7*_DoYYgc@`nRGpC9?R&q%?JoC}>CuQ5Itd@~`~lY=?n0TGts%&Q~!vjWf=Im>bC=Zj7FNC~V1P8vX$=1`6=|NiAJTl!-)!Uv-SYdK@ z;yb$gQFv4%0jBc$_>2y9GK;hi;BoMqD{ILMj*2xC0y%3HN34Qb>s)3bJKnd*vlX@S zCyKcr_MWQ&+nH?iq2LR|<5u55NK_yp?4j2+RCG0r$p`YSIEH4yczhS-hiZ{l!;4bL zk)cEU0xfYMTtiErXdh;Jzt_TLr)F(7HL||D)h!945)(JGIjl1V*LUr^xYe~sb+fem zP)7E(d9>a@ZF5d<6py0|cg)@vMO}ko;vU>p)Xc|*Zq2EiB(C5%S5N4;W+ZW6xUHx9J#SK5jYGgUmJYR|;!MsF2iCiK zB;Psa`Z9Y=Reu1CylBidE+wgvcSaR|?W-ml9u+#cwn`PrEyR1eKy%La z4hZ@i?W1Mc`?rn`3z$}pn${V%HMlx0PAIX=*wsO=tE1eH`lESQYobGs&8_K*Jh!8| z1p+}7d&EXoU6HzQk5RmYYZi&HNBs;|X63W%Q3%9@Cm7B=+34R`bRZaSHB5DXw;m?z z3`Tq+hXPL-+a}Kdw4mAf=EB-drm8g}caDh#w(VyYpsO$GAOo{y(pO0vp(tmU_u)ct z1YaMaPCJpFJZC@iq&J8DdFxq1ghfM!Qq&aa&c?*-6A zl#TD$UQ*_z%<`%lLs?T2uB%VyU4W_T>t@c=W?3H(!&Xd&z5FLmnniI1M;nB~bb=3E zGS;5g(_z0eqhy%?rkk|uOQ~HW#xf7VwZaJ{d&6+8aGdDuH7RIfp8Hy&(=7Dm#=*T2 zt(iNKV~ZrNa4S+(Clm~9-zeHi7i`tug+c@tB79K z3U9P_J=mlLIqQQ2nSn1GC*|-QwIV$$(U{Yc$=#3kBl_n_NOO<$Kh3>i-_1v_0l6zp z;%CaumJSWS5Xrxn1(6}B>sTyz-)W&PnT58WRKCqro3J;yjFzOL9Q#929P$B+M2*^HvG*!dCL~!_bUKe*H9@TYDU5_(qsT;&p<_?wc0K} zWRCnUMjq8&7k&X2;~wLt@KQjz*QE=+4<}I<2JuJ)+OkbjVz4A zbhf(WGbvC1Tg9rIF^<3@CgZ1E;%V5EX_edvpT|3@YC2~<&*5v@s)(QtdYaY`jm3SL z8eu8uEv7WXVCLaevn7l~;$e9dt=*i0! zlVtbR`6n?tuS#@Ur*DSW^&!oYhjZm>O-Qq3N&Mf;l2>RTVuMaL!UrM}59eIA3hdxR z5}LP|6b=BGwYqivp7;qEAFL>6be==;oQp;!`y)iN#!fLh2 zxg^0bJgLHj_JbxPt)05v#(d>U)1ek@>2-|z!XvgFWb~frTe`r|(YsTzmZ}gB|278Q zEsXHf)!0OnDow|R&3+HE%3I!7)T~QoDq^y?n&yl$rczv^cp{zCFqF|CpSy{P3Dzv9kJ}CF^}&Rq)Qci+;6W(!ypZPlfQH$0c;<@CikY(5KC^fJ z3$>hk-f8HcFM7)M7mslTCkh0M4>!gn5C=Q-+I$X$Bh-2GdqS@B4`Pp}o)Zh(*5@yL z7o3G2Y1j6QMW4+=1#Q_VLkQLyL}&2O=^&KMF}hUIbRo(wwr96WYY4v+h<7*mno1vJ zc8&(dOpT{g8!ThqHqxX!NVus6(yt*d>%QS2g2T04Ib*#u=mipSM{!~=1%Dcn-jsKh z1@>&Pz}9Fn=u76B^ElnM4i>99)CHc42e;)yW5c@4HVAw1r8MvX;7RK&b$fS{1Y6%3 z&QsYB#BzpMh8rv-=*+$~h5Wsg!N~}F@yvw}~iXH$=d7dDcV{wc$nHE&kr!Q+~ z<82ya4`0sda&=~;wg9Z33D)D)(D}3MhBAxJev4t!n+LU;t4hE^sHE;azwdYSJku(b z;)-+r-Jk`W^CY9l>7D5=ccx^kG)dGjZY(JH$efxh^rDxEqv2atE5zN;`2-iu4r)vE z;CYtIlADZH+7gZZpwq5VKBJkrSM{+Zjx*?6tpwU7}Qspb|Wq?d*Zug4sNm1hY%=*tlaSr{z|0l z&PYvh(y)6|nWr!co0P}@FeyX$T(+qiPBdJwRp?<7Jm!X0s$aoxdK)zrm~fbpoWtGd zhGN|o$~AozURvJdd6B+P%NRol43$YZhU487!{^g;)?jurF4tG_r%(a>yO?T(CSow&yA^*8py`meBs%BuKk{qq}&Av3MZx>w}-&G zT^oW#nlXqsB&BX$_B%C}HE3Ytn8`_b3`DxzDE(wqNP725-!azT#3rdO;@p0j0>Jin zMv(0)YssEE{lb@L3Em8eC*V#x9=YZ=jt*ocxk&X3o&Fqc%@^kK$!^u%v|u~8Y1O^h zCvrNL!Zuoo6aRdlN>#f8hPTn?hV9ot0YV;~E4u!r@QDv*?zRHPG1Cb;vXso)?-#Bd z91#K#Z56-MIUIO$zU#>jkVN4P+L4iwJCF2lk=cjv-;O;@$!KK;T{!&>?+pO7N1Aqq z`F#rj=+n4Gwr-!bwT2b2i|O9`g0kmU*a}MyMCH7j$!s(&V`X^8Shwu$O!O}I7pq@sh}53y`gYv@G!md*(KfE#22QM#i{jY`#shJ?wQ*oO%g~4aD9QU z$PCPP8?Ln4YxUIG#zLIMRaDru`QPAxTE7*P&A3%+&(Ip#|L8-FFJ*59HVMf{ijv^A zLQiaejYCwg`h6YBJc;O=cDn=oQVw4pEU{Q?a$Qn;K~T_MdNU~23pNKe2#L2tV zWx{Z^FMiD=bQ|{eifyp5_&W`9ZTQ=HnoJ)_yJ{;+H)Lj;Za8K7m>m_J(#tE-s}F@M7ZT<8-gUo=e(k6Ar1_Wq81*KWK+qR{I13#Ri0b#e zgQ|Wq2T3hYxO9uA)=W3Gv|usHoXQnn)Z^ zPDpOI)$XcHd}TM2$+@(ubrdm_ySvHQDr6vW5o3*Qo@R|=LCWKJj?mg2_sr8cx7r=rt(0x>_ccU+CFs$n4!i+XeL4o%!CiSp zjkSt(Ep9EKGi-|iC=i~Xl)*Vd+9rdNy21#){W!SGJa8EABP!4uqB2h1u?N92U(RQDImueFV$TB@)L&hI=w9e zDVG5bWngV5&Qd*EdOJ)xCLW)RAeUJn-1b$gL#=e)oz4$-c0ymr%j8SXO_n_Jb7Tg| zck?6brww0v2GV-~gjauqLD#^UWb)dVoV=A)KcY_~P}dZ*5xUSUKl{jjtGrqHF)`7# z7h)PFj?)HFg$6xQ1q$lQa!gIcv~R*;v&Qrh89V)4?c0L-W(M5+yd@phn=A{P%d>se z)hhkfwoytqe-7rRr6t|2sSbQ_mbmI&8U5i7gi;Gdk+NIK+C0$A8C?I1lohnncbKID zsC)Q>dbaH{_t`ID7c-@%ADxQwJv8R)%{Y0}opd)AvXaZqYpf>k3HP))kfW;G&T*Dw zd4C61<~vft+?%NCAHukcI#%8E5hpL8eIH`K40^N76(yE7`bJ!(7kK8e&LrE7;Si2I8lH0*p+ zRhUIl!+7qwUT;3d3!Cai-$%Knh3_FzT(P#^N|br33m0}1iUTxs)uWvo`qiG%IHfs` zU+RyK#q5-t=9bH)lR}yrIs49A(sz}~9_~&W1UtN=scAAL4vlH9@;aP90(p2u(t>0A z!ugJq9e|LTVT)QnNp`c!`RwBG;VJvi`byE$+S-jX9FAXJbU&60mis}wJ?WClnzHJu zhy=y3QlVg31L?TSWf|B0j6_9MLvM|nkmGzAxS1mTe6x6o zNbO#C>G5#g-a?xj3ESa~k`cM%olo81{8eu!oT$ z0bJG=_4si^D61D_9H%doTB4h4LW_Eu+GEfR2&oVdv(k2)b5I?~wZC7Ba{Xp?Ylm}x z#1}}B!#ei*bhvpXBZ&8{O+W`-VD&f>ru+vxbDH)q+Vom|o!QJW4L9C>;KoHta=8Jl zb!WXI_vMdk_a40Yin1fL35Ky1PE|&46ua_uxBh5rww^VeD$Ni8kvzL@#w^RCnXPF3 z249(-6SPu$(xqQXzeHv||T7KUW z%{yOU(T=53Fl5ZeYxX>A$e~qQ7SfWqFRQk!X8SeGSC%qQS4NX3gln}cV8wT)<7sP9 zuS4Chp}p_Ih;`~I;IiDse}vU}C%$c>NSECt(IKk-R|{5ZrR3%6`a&(NpPzlyC0u^0 z7CU}Lm|TrPu}LdhbuwqSacsXXfUh>b*1DCj=2&>G^XH8zxkpnj_P{j6)tONz;h80L zsozUI*&m9is2r;nv>_{YH8`1TBTLsKK#{V55JD*R|GLjf|8}3d?o2M*GuOIolVth5 zcPQ;Y(aKAchrY!*|JBSjv(rIB8MH8zU z;NWW;@#$WMa1ID4f>@v@L89!```Ans)>e(qHehtb{uhe#7mXJPi)0JDoSdA{ z#a6$%ZKTm}lG1apbC|#!SG$A4vz|r0^xF}Fz5bWT&=A?hafk5g&u3jABBHAC0#^K9 zAQK^Uc?NZgz;%~lRV)5lRnkZYtRq^NhOfqP0PN3|91+MfU#xCs5VXLl z!gN=S`zH6+FHUxE7GWtzm}&F?aM8oBCqTyi%OT^&CIY2*corzF64}29*K86kDAg(_ zdHg~-Sw1h}Z@}T;Z#$vuUa5Or*)suhCwse%TWCs^FC3Pb)awezt-|NIGISOA9cKw4 zdvn)}Z_7Z~)dUYG4R#WZPLk{;xx|l_keZ)J+dkBOdz$OOe(HR--Z@W;&n@U>vXPTX zY8kWPBmvpVcfqPe`X;0XMjnt%>$q-5LUoE-M4EsEsXToQt^4sSsnb$1A~@Wmx448V z#OO;~J-8CrP*qU5Z;guFD7C+?lCMTpHSdyFzu(QQGP7i|>V7mg(SYA}h!v7tYqbWx z!)%=7DM6VBa^EhFuDI&D7Y1<&NAIaK(n=MCj*kSg=yNs9zN5i#b|%{~o*A`g^=^GM z3@UeAcZHRY$U^H-tihe4$@`+fkAiJ%Ml@j15#+GzKUC)i!lfZ)cN>;uc4bGJc!0pE zOt2r8OwaUVXKEsxz=t`%mnZ}F4u?p`ZzBmVN{+gPIEr45f`boEy32aeVEW7fe+ zLV2CM@r^fbVVg2F34CebU-0oPh}50u95~_sAsjYUUfVcpYjz9Rjuv7go7b)pAtawa zQxfODAqXu@5JAIZi22P)T>mFdsv*RShh^+7=8fDHq|c^e+;)B;%0*H<1g&;ad;?%H zW4`g{&&toF&svp)*rob!LKWnI5GmD#LsP3D>f<^lE*BH@@lQO3#6-52duck zM0L2p*}UuSc3wx3+jxA<%hW#335pei?Jkw~=Aa|T(T{Lc#|k`rb4PQo0}+P_9X>h& z@J>ek3A{VPb47d`>(sTn2g&nlmM|VLk4eALSH`N0HMqyMrE;zkHuLimXb+$}8u?-< zKoJh)YZb8lD}Zk^dUM@x@UcOAr>0HAinAZYd|nHrom%w#$mlxAUGvYr-x7Y~!fIRl z>|^tDO6Ie~?*yG14R#p`uFHDEg_+?30;^n3F*xq$y7nz499F~MevyVr1m>v4RBi?F z?}PwDHtv@!dC&(9Ly)50Au_}c*=&mJx5zi@iAvM$g!km#EGg#^qO_eAk$Z z@q$|Y2M`BO30Be*^=mwt&YXg1P28JmZVOcgjtB*ONSj696bG+^nhA->5wOTGlZyj+ z@hoM6t|D&l{5b%vB}tln4;(&2#MfxfMtDQ6>RSD(>wF5Q(YS^U6(2FNS9FfC`dG>aC5~2dNhtjr zNJ8B-eSyx%jsO0A5m9gHfTSGVe&?Nm56kAN(mkq>D zoUm+)tF)%vT8Y+;0uIoJY1%hW3(^VET8Jip*oV5$WQ*-zd>d0q&Htqt<{8MBzWDWe z-Uj6AMuw7ky;hAJyzq-Kh}Ly=NsjaaEFSMcO2b4UKw9pszXVi^*IvX(1@5JI4xE3!S= zp+oR!lgi@s^W)PG5S3m>^ghPyuVDlgb}JxmVH1jj8&f`?Q7IK26lnM@ay!ETmiw-@ zgaZw)Q|_-=xcUE#gU;<$Wr6ZP^57$s7?}AX~_B@DN0{u1Z{%jk$li3^H8!>>B zc8bKO{gB+L_I1@u3dvdE4c$h7i66~rxH#7z@`3NU-rY=bSzLAZ4Ju}gx=-BVG>_cJ zNlt=qMFE2){-K8Gld($cSB#g=Ih@a}*V>2jy)=O1WdE0;74mO}*6-V)FjrvEz=w>YcfKPR7ss!dr54?1 z&$(2TmAgi8g()?wEbsWTd`6EoH(>vDVPdgd*+JC3+NWu-pknn}tWBk6<&1>5*oMQm zO9qW8@OjF$D~_HWeLk|EX{UJptOvvJ27#e7U}R-*lo_=qQ*6jH2CQ#-6xCO84OM+; z2g_Un$;WK+vK_a9Q|HMwECi$;WiwECN~r3KyQbhHZu8?i>$%!4prC1T=&|8~yvDuv zc~@f$?vB){R=b8ycdJ9*t9iOu#C}F>Nv#Ri(~jRm=m=i=5ZX;2Y4A4Zr(>7hx6mN* zgDn5GL&*PahhFJPnt>`I(@4hKzw?{%_MNrMGLXj&lF~TYAQ-zK2tIhoI1K&QKlFpJ zw4bgsiaFB$e3{6n45!COU156DLk(MbKQP|z0>j->X01w?csy>EZVS#q`-^G~ZhfGpJtK<3ftf|J}oqS6Eh*hPe7IHB_YFAN7nca3)@YyY69WCFO(D=F`B z(wy>Xb_Av$#C_dYCS5t>C+sXGs7M_nHWn+0Q|-&-I9To|a9{h3OBXiUr zTqdeVHns(`zXsi6d7Z+`o@pACzB)*EU1K7)d*zyAHbO~fl|~FAJ1+Zy zKtLIyoa-93EppA91_M}*jy6+TSV$r@rZ^%wXa{x0z8C!aNR@h96tLJ`me)`SZ3w|d z5M2mCBO@2EI5*HOr_0rMT=hQI^P44FW*4v@Y=XXnf*F|lg1{p2WH-xzTY}k&vW^^y zvQ-qdD^Fx{;-nN4)20bqrvZ4azbIst`~UZS%(;Vt23m#uD75S5Ppb6tfbtr{Fv<{7 z{4moGf)|nwqM@7O1Y+u*ivRPyA%xjNnO|01$@vqb){B1_R6sblbv^*44H$msvR zmV-s?pO;k>io08m8HK#QQVOpZJWdHM>x2@H^_s`qK7sFmQ-KQ?gU)yh#Aj3ZTEW)nEeFH4xAO9DZxCZ~>5^&9c99_P9r7_lWHG6)W2#P`A z|6ctH`LT!@BlIP{yVXj@zT!6v%z303BUu3B_X?3dsx(%}H@VC77YoS#VS&G%Q-k)B zNb%)L&E`1AZ^00O6q78ff)4i*cloS)(OA*&oeq9 z-5Wy&>c6RdMRH7LdH-GX4|5N9f4|ns_6rOo-EjV*8%~jFCEyc0hOj@k3z=8Ex?R6_ z{uTpS(gIJO0I@}B-?ghW!_|}V7-YOf+$8)160Q!!cFLmsI`Tn5zu$&njjJ7~n`_>s zcpxWnw-fV{_RbMR=~s6oy;~Ku=rm*2_%2^ z?EMfuMrgw44sw+{jx2e9^CehD4!KNcnOQ3P?`77{lz@Oir;++?^4vWbPA>rjfs`U` zL!`W}YQr(yAdHpeyA7uCl+)j$`}>u;P&4@#9A;i72b@_q<&X>Mo7;)4?>BU$k4lzYE5f8IUaW=nisMV8nfE(2RO#Jso9FS1%2us3<7z4d=mjUy@cP2w->4OaVw7Wpb&$nqj2 zo1HGYLo6uOzqnbR#aqNvf??b-6B%If z@(|V+LB1V>E{dQmA~B5o2W`lTlN=|~NbvHi@al_xgsMJX-+m+PZs#zzo+;VVj^WL} zDB_>$mHZ}xd&b_WzAy7cn?AY7QYn@QdGx>O<^2Zj%tHqk#xL@e6;~wyB>G-k=$j#b z?QI9_CuLdJc&k611}AqxR{hiXM6WG~l!%_s-pPbTPD`TC(fv6b1}hZdWZjmZWXu^| zYk3i9!hcu;tmhvMDQTp>r`ZhJCrW0z5m&zoqXwoK@9C$gtAeb1z!sCfk2H8>wNTY- zjXY$Zue-sQQ!rotMKe4nl!h&B(N~C(Cnx6*40GwrW^EeW8oMKw9{()}fULp#w#YAX zthv%84(#g-i>u=YuLHER@=npU%HJ-V;?-I#a_#k4Kkjwz&jh@;w!GKyVmYw$-JcVJ z_(HUVCcb5m5eo?XgPBu)q8(eZ>4Q>ypQ(x_ zfcJ3tA|ALu7U0vLmzobx-no8()O1($|HSYb+EK?{1`r^vcavXn#}25z+En2A$GgX3R^%xQS-|Ia&+$Lya4avZU@t4b*Q^Q4! z5ly48v%j~FNtHsRHhejiKXM2SmYLgtO&8LimqlLm`vCs^TU1}%7;d6i-{lV9HgSrt z4G=5>JfU9|d2l6dX(WJ*usbARz2XT5U^5_jZ*e!_>CI9&&4dC zsNA|O=!VXeH{bFFfrLRqi9tv0C8{M!z!lpdcV0p;m>R9v%1)FU>ek2<@nq}S9~uMi zTqgx|p#9V&w!{GZ&^-Qa`08a>Kv*j;;gEag6LLGRXeI+g&l3|t;VEWA#s z!;3EmS8LtDoOHtgHNF=?{!mKhiV8lrR2>1szn>k(lG2L5slEGwB=b=;L2{AZhP>}T zXrzbs15TETr)ddT4sTnJ?COyjRABp04yiT0k$;Kj{pTdSXh4)Zk654n%w6t}tdW1} z{S-&IWi78|+jIOMhYiVuJZPdetbF{l13~74amAJ1#W&zFamzVL;S4iz%h)MMujv}< z9zijN;laiTeIseuV3a#Za5id^`vqUudJ1o-i$Q9*D7fsDsM z>P_P&!4RL=?ECw`@U8<_-Du=&i@gzpXv=drg zfr<6+SHvs6L;25;A6jXC^(Q{)gX`)#lv)DtutnfWUA;NFH0Ak2X*GxT8la-uoOcJg z|M~@Bj;H7mGfqBFV@Cdx(qU{s3~soF93t=7V?tmMFC~T#mDAiP<%sJS{rh`=w)`*3 z{{D$l1xTOYg&Tle8pwc5nJ|VSt%>fC!vFSV|0>|=z1^kIxN?Rykhj~B!aINYC$q-Y zz4)6D|02oXKkXrRx_d6Vg=F~WNQUp<0fm1F?qbsa5d8nX+pC*Hi4>SW8az$O0I@?% z0XuX}xg+;oWXkwY^8dZoKaS*^CfH^D5pKbi$H0ozAa%eR{aY&i*9CvySMRrAm!FO< zQXRnaAZa&%RcLZ_2kg5s4e2ia>(2d`pYUwJF8Ahz%W=RtyeR)VSz>@ccVTCK_SO4( zj~R4I<$ohJgpvb1A#_OLpUUcgSv4Xa9FN`1lI2y6!GGD+f7$bMWZ@Qv2Gc)e)W4+F zfBnhZ4j<@7YuAMBf0%~<^)LU$g5I4#2Z@PGxBV|~@DHlWTZ2RIwhAt5_`iP#FBTA> z(=_M)zgGT#Mu>IF7nnYFzAXgaNKPb>;!gbwYJ-y`v$Ls z2>Qw)1sDKIi` to run your selected problem. +- Tools like `get_logs`, `get_metrics`, and `get_traces` are available in this CLI. +- Use the `submit()` function in the console to test an agent submission. +""" + +WARNING = ( + "[bold yellow][WARNING][/bold yellow] Starting a new problem will " + "restart any running app. Make sure you finish working before you start." +) + +# (If you still want TASK_MESSAGE for problem context, you can re-enable it here.) + + +class HumanAgent: + def __init__(self, conductor: Conductor): + self.session = PromptSession() + self.console = Console(force_terminal=True, color_system="auto") + self.conductor = conductor + self.pids = self.conductor.problems.get_problem_ids() + self.completer = WordCompleter( + ["list", "options", "exit"] + [f"start {pid}" for pid in self.pids], + ignore_case=True, + match_middle=True, + sentence=True, + ) + self.session_purpose = None # "problem", "exit", etc. + + def display_welcome(self): + self.console.print(Markdown(WELCOME), justify="center") + self.console.print(Markdown(OPTIONS), justify="center") + self.console.print(WARNING) + self.console.print() + + async def select_mode(self): + """Prompt until we get 'start ' or 'exit'.""" + while True: + inp = await self._prompt() + cmd = inp.strip().split(maxsplit=1) + if cmd[0].lower() == "exit": + sys.exit(0) + if cmd[0].lower() == "options": + self.console.print(Markdown(OPTIONS), justify="center") + continue + if cmd[0].lower() == "start" and len(cmd) == 2: + pid = cmd[1] + if pid not in self.pids: + self.console.print(f"[red]Unknown problem id: {pid}") + continue + self.conductor.problem_id = pid + self.session_purpose = "problem" + return + self.console.print("[red]Invalid command. Type `options` to see choices.") + + async def interactive_loop(self): + """Once problem is started, repeatedly shell or submit until done.""" + env = "" + while self.conductor.submission_stage != "done": + # display last environment or grading response + if env: + print(env) + + inp = await self._prompt() + text = inp.strip() + + # shell command + if not text.startswith("submit("): + try: + out = Shell.exec(text) + except Exception as e: + out = f"[❌] Shell error: {e}" + env = out + continue + + wrapped = f"```\n{text}\n```" + try: + resp = await self.conductor.submit(wrapped) + except Exception as e: + env = f"[❌] Grading error: {e}" + else: + env = resp + + # final results panel + final = json.dumps(self.conductor.results, indent=2) + self.console.print(Panel(final, title="Final Results", style="bold green")) + + async def _prompt(self) -> str: + loop = asyncio.get_running_loop() + style = Style.from_dict({"prompt": "ansigreen bold"}) + prompt_txt = [("class:prompt", "SREGym> ")] + with patch_stdout(): + try: + return await loop.run_in_executor( + None, + lambda: self.session.prompt(prompt_txt, style=style, completer=self.completer), + ) + except (KeyboardInterrupt, EOFError): + sys.exit(0) + + +def run_dashboard_server(): + """Entry point for multiprocessing child: construct Dash in child process.""" + # Silence child process stdout/stderr and noisy loggers + import logging + import os + import sys + + try: + sys.stdout = open(os.devnull, "w") + sys.stderr = open(os.devnull, "w") + except Exception: + pass + server = SREGymDashboardServer(host="127.0.0.1", port=11451, debug=False) + server.run(threaded=False) + + +async def main(): + # set up the logger + """ + logging.getLogger("sregym-global").setLevel(logging.INFO) + logging.getLogger("sregym-global").addHandler(LogProxy()) + + try: + set_start_method("spawn") + except RuntimeError: + pass + + # Start dashboard in a separate process; construct server inside the child + p = Process(target=run_dashboard_server, daemon=True) + p.start() + """ + + init_logger() + + """ + import os, subprocess + + dash_path = os.path.join(os.path.dirname(__file__), "dashboard", "dashboard_app.py") + dash_cmd = ["python3", dash_path] + env = {**os.environ, "PYTHONUNBUFFERED": "1"} + + proc = subprocess.Popen( + dash_cmd, + stdout=subprocess.DEVNULL, # + stderr=subprocess.DEVNULL, + env=env, + ) + + proc.terminate() + try: + proc.wait(timeout=10) + except subprocess.TimeoutExpired: + proc.kill() + """ + + conductor = Conductor() + agent = HumanAgent(conductor) + conductor.register_agent() # no-op but for symmetry + + # 1) Intro & pick a problem + agent.display_welcome() + await agent.select_mode() + + # 2) Deploy environment & launch HTTP server + result = await conductor.start_problem() + while result != StartProblemResult.SUCCESS: + agent.console.print( + "[yellow]⏭️ This problem requires Khaos but cannot run on emulated clusters. " + "Please select another problem.[/yellow]" + ) + await agent.select_mode() + result = await conductor.start_problem() + + # 3) Interactive shell / submit loop + await agent.interactive_loop() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/clients/__init__.py b/clients/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/clients/autosubmit/autosubmit_agent.py b/clients/autosubmit/autosubmit_agent.py new file mode 100644 index 0000000..530e17a --- /dev/null +++ b/clients/autosubmit/autosubmit_agent.py @@ -0,0 +1,31 @@ +import subprocess +import sys +import threading +from time import sleep + +server_url = sys.argv[1] if len(sys.argv) > 1 else "http://localhost:8000" + + +def automatic_submit(): + ctr = 0 + while ctr < 10000: + subprocess.run( + [ + "curl", + "-X", + "POST", + "http://localhost:8000/submit", + "-H", + "Content-Type: application/json", + "-d", + '{"solution":"yes"}', + ], + capture_output=True, + text=True, + ) + sleep(60) + ctr += 1 + + +if __name__ == "__main__": + automatic_submit() diff --git a/clients/stratus/README.md b/clients/stratus/README.md new file mode 100644 index 0000000..c50180f --- /dev/null +++ b/clients/stratus/README.md @@ -0,0 +1,8 @@ +# Stratus +This folder includes the implementation of the Stratus agent. + +The agent's main implementation is in the [stratus-agent](https://github.com/SREGym/SREGym/tree/main/clients/stratus/stratus_agent) directory. + +The agent's tooling support is in the [tools](https://github.com/SREGym/SREGym/tree/main/clients/stratus/tools) and [weak-oracles](https://github.com/SREGym/SREGym/tree/main/clients/stratus/weak_oracles) directory. + +The agent's prompts and configurations are in the [configs](https://github.com/SREGym/SREGym/tree/main/clients/stratus/configs) directory. diff --git a/clients/stratus/__init__.py b/clients/stratus/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/clients/stratus/configs/diagnosis_agent_config.yaml b/clients/stratus/configs/diagnosis_agent_config.yaml new file mode 100644 index 0000000..85ad9fc --- /dev/null +++ b/clients/stratus/configs/diagnosis_agent_config.yaml @@ -0,0 +1,79 @@ +# each step is defined as one tool call +max_step: 20 +# diagnosis agent has no synchronous tools +sync_tools: null +async_tools: + - name: get_traces + description: | + Get Jaeger traces for a given service in the last n minutes. + + Args: + service (str): The name of the service for which to retrieve trace data. + last_n_minutes (int): The time range (in minutes) to look back from the current time. + - name: get_services + description: | + Retrieve the list of service names from the Grafana instance. + + Args: + + Returns: + List[str]: A list of service names available in Grafana. + - name: get_operations + description: | + Query available operations for a specific service from the Grafana instance. + + Args: + service (str): The name of the service whose operations should be retrieved. + + Returns: + List[str]: A list of operation names associated with the specified service. + - name: get_dependency_graph + description: | + Get service dependency graph from Jaeger's native dependencies API. + Args: + last_n_minutes (int): The time range (in minutes) to look back from the current time. + Returns: + str: JSON object representing the dependency graph. + - name: get_metrics + description: | + Query real-time metrics data from the Prometheus instance. + + Args: + query (str): A Prometheus Query Language (PromQL) expression used to fetch metric values. + + Returns: + dict: The raw Prometheus response containing metric results, including timestamps, values, and labels. + - name: exec_read_only_kubectl_cmd + description: | + This is a tool used to execute read-only kubectl commands. + + Args: + command (str): The read-only kubectl command you want to execute in a CLI to manage a k8s cluster. It should start with "kubectl". + Available Read-only Commands: + "kubectl api-resources", + "kubectl api-version", + "kubectl auth can-i", + "kubectl cluster-info", + "kubectl describe", + "kubectl diff", + "kubectl events", + "kubectl explain", + "kubectl get", + "kubectl logs", + "kubectl options", + "kubectl top", + "kubectl version", + "kubectl config view", + "kubectl config current-context", + "kubectl config get", + INTERACTIVE COMMANDS ARE NOT SUPPORTED, e.g. "kubectl edit" + INTERACTIVE FLAGS ARE NOT SUPPORTED, e.g., "kubectl logs -f" + - name: submit_tool + description: | + The tool to submit benchmark results + + Args: + ans (str): the answer you would like to submit to the benchmark. You should only use string "Yes" or "No" with quotes to indicate whether there are anomalies detected. +prompts_path: diagnosis_agent_prompts.yaml + + diff --git a/clients/stratus/configs/diagnosis_agent_prompts.yaml b/clients/stratus/configs/diagnosis_agent_prompts.yaml new file mode 100644 index 0000000..3d9baba --- /dev/null +++ b/clients/stratus/configs/diagnosis_agent_prompts.yaml @@ -0,0 +1,70 @@ +system: > + Monitor and diagnose an application consisting of **MANY** microservices. Some or none of the microservices have faults. Get all the pods and deployments to figure out what kind of services are running in the cluster. + Carefully identify the whether the faults are present and if they are, and identify what is the root cause of the fault. + + Stop diagnosis once you've found the root cause of the faults. + + Go as deep as you can into what is causing the issue. + + Your instructions to the tools must be clear and concise. + Your queries to tools need to be single turn. + + Remember to check these, and remember this information: + ## Workloads (Applications) + - **Pod**: The smallest deployable unit in Kubernetes, representing a single instance of a running application. Can contain one or more tightly coupled containers. + - **ReplicaSet**: Ensures that a specified number of pod replicas are running at all times. Often managed indirectly through Deployments. + - **Deployment**: Manages the deployment and lifecycle of applications. Provides declarative updates for Pods and ReplicaSets. + - **StatefulSet**: Manages stateful applications with unique pod identities and stable storage. Used for workloads like databases. + - **DaemonSet**: Ensures that a copy of a specific pod runs on every node in the cluster. Useful for node monitoring agents, log collectors, etc. + - **Job**: Manages batch processing tasks that are expected to complete successfully. Ensures pods run to completion. + - **CronJob**: Schedules jobs to run at specified times or intervals (similar to cron in Linux). + + ## Networking + - **Service**: Provides a stable network endpoint for accessing a group of pods. Types: ClusterIP, NodePort, LoadBalancer, and ExternalName. + - **Ingress**: Manages external HTTP(S) access to services in the cluster. Supports routing and load balancing for HTTP(S) traffic. + - **NetworkPolicy**: Defines rules for network communication between pods and other entities. Used for security and traffic control. + + ## Storage + - **PersistentVolume (PV)**: Represents a piece of storage in the cluster, provisioned by an administrator or dynamically. + - **PersistentVolumeClaim (PVC)**: Represents a request for storage by a user. Binds to a PersistentVolume. + - **StorageClass**: Defines different storage tiers or backends for dynamic provisioning of PersistentVolumes. + - **ConfigMap**: Stores configuration data as key-value pairs for applications. + - **Secret**: Stores sensitive data like passwords, tokens, or keys in an encrypted format. + + ## Configuration and Metadata + - **Namespace**: Logical partitioning of resources within the cluster for isolation and organization. + - **ConfigMap**: Provides non-sensitive configuration data in key-value format. + - **Secret**: Stores sensitive configuration data securely. + - **ResourceQuota**: Restricts resource usage (e.g., CPU, memory) within a namespace. + - **LimitRange**: Enforces minimum and maximum resource limits for containers in a namespace. + + ## Cluster Management + - **Node**: Represents a worker machine in the cluster (virtual or physical). Runs pods and is managed by the control plane. + - **ClusterRole and Role**: Define permissions for resources at the cluster or namespace level. + - **ClusterRoleBinding and RoleBinding**: Bind roles to users or groups for authorization. + - **ServiceAccount**: Associates processes in pods with permissions for accessing the Kubernetes API. + + After you finished, submit "Yes" to denote that there's an incident in the cluster. + Submit "No" to denote that there is no incidents identified. + + +user: > + You will be working this application: + + {app_name} + + Here are some descriptions about the application: + + {app_description} + + It belongs to this namespace: + + {app_namespace} + + In each round, there is a thinking stage. In the thinking stage, you are given a list of tools. Think about what you want to call. Return your tool choice and the reasoning behind + When choosing the tool, refer to the tool by its name. + Then, there is a tool-call stage, where you make a tool_call consistent with your explanation. + You can run up to {max_step} rounds to finish the tasks. + If you call submit_tool in tool-call stage, the process will end immediately. + If you exceed this limitation, the system will force you to make a submission. + You will begin by analyzing the service's state and telemetry with the tools. \ No newline at end of file diff --git a/clients/stratus/configs/langgraph_tool_configs.py b/clients/stratus/configs/langgraph_tool_configs.py new file mode 100644 index 0000000..d6fb157 --- /dev/null +++ b/clients/stratus/configs/langgraph_tool_configs.py @@ -0,0 +1,46 @@ +import os + +from dotenv import load_dotenv +from pydantic import BaseModel, Field + +load_dotenv() + + +# FIXME: name of class is misleading for now +class LanggraphToolConfig(BaseModel): + prometheus_mcp_url: str = Field( + description="url for prometheus mcp server", + default=f"http://localhost:{os.getenv('MCP_SERVER_PORT', '9954')}/prometheus/sse", + ) + jaeger_mcp_url: str = Field( + description="url for jaeger mcp server", + default=f"http://localhost:{os.getenv('MCP_SERVER_PORT', '9954')}/jaeger/sse", + ) + kubectl_mcp_url: str = Field( + description="url for kubectl mcp server", + default=f"http://localhost:{os.getenv('MCP_SERVER_PORT', '9954')}/kubectl_mcp_tools/sse", + ) + submit_mcp_url: str = Field( + description="url for submit mcp server", + default=f"http://localhost:{os.getenv('MCP_SERVER_PORT', '9954')}/submit/sse", + ) + benchmark_submit_url: str = Field( + description="url for the submission result destination, default to http://localhost:8000/submit", + default=f"http://localhost:{os.getenv("API_PORT", "8000")}/submit", + ) + benchmark_app_info_url: str = Field( + description="url for getting benchmark application information, default to http://localhost:8000/get_app", + default=f"http://localhost:{os.getenv("API_PORT", "8000")}/get_app", + ) + benchmark_current_problem: str = Field( + description="url for getting current benchmark problem, default to http://localhost:8000/get_problem", + default=f"http://localhost:{os.getenv("API_PORT", "8000")}/get_problem", + ) + + min_len_to_sum: int = Field( + description="Minimum length of text that will be summarized " "first before being input to the main agent.", + default=200, + ge=50, + ) + + use_summaries: bool = Field(description="Whether or not using summaries for too long texts.", default=True) diff --git a/clients/stratus/configs/llm_summarization_prompt.yaml b/clients/stratus/configs/llm_summarization_prompt.yaml new file mode 100644 index 0000000..93219b6 --- /dev/null +++ b/clients/stratus/configs/llm_summarization_prompt.yaml @@ -0,0 +1,37 @@ +mitigation_retry_prompt: | + Summarize a human-LLM (large language model) conversation by carefully analyzing the interaction, identifying both successes and failures before writing any summaries. For each identified point, select a direct excerpt from the message list that best illustrates or justifies your summary. Then, generate two lists: one of summaries describing what worked well in the conversation, and another of summaries describing what didn’t work or where the interaction was unsuccessful. + + Be sure to: + - First thoroughly review the entire message list and reason step-by-step to identify all relevant successes and failures before producing any summary points. + - For **each** bullet point in your lists, include a direct excerpt from the message(s) being referenced. Integrate the excerpt as a supporting quote within the summary bullet, clearly separating the summary from the quoted message. + - Consider the intentions of the human participant, how well the LLM addressed those goals, communication clarity, misunderstandings, helpfulness, responsiveness, and overall satisfaction. + - Continue analyzing the conversation until all notable successes and failures are identified. + + Structure your output as follows: + - First a “What Worked” section, then a “What Didn’t Work” section. + - Each section should use a markdown header. + - Use bullet points for each item (2-5 per section). Each bullet point should be succinct (no more than two sentences). + - Explicitly include a representative excerpt from the conversation in every bullet (formatted as a quote, e.g., “User: ...” or “LLM: ...”). + + # Output Format + + - Use markdown headers: “What Worked” and “What Didn’t Work”. + - Present 2-5 bullet points per list, each containing both a concise summary and a quoted message excerpt from the conversation. + - Each bullet may have the structure: Brief summary sentence, followed by a colon and a quoted excerpt. + - Do not include any extraneous explanation or text. + + # Example + + ### What Worked + - The LLM accurately interpreted the user's request for a summary: “User: Could you summarize our discussion so far?” + - The conversation maintained a polite and friendly tone throughout: “LLM: Certainly, I’d be glad to help with that!” + + ### What Didn’t Work + - The LLM struggled with domain-specific questions, resulting in vague responses: “User: Can you explain how blockchain consensus works?” / “LLM: Blockchain has something to do with security and records, but I’m not sure.” + - An ambiguous prompt from the user led to confusion early in the exchange: “User: Give me one of those—you know what I mean.” + + _Reminder: Analyze the conversation deeply and include a relevant conversation excerpt with each summary bullet._ + +localization_summary_prompt: | + You are a helper agent to an autonomous SRE agent. + Summarize the agent's trace and output the potential fault location and causes. \ No newline at end of file diff --git a/clients/stratus/configs/localization_agent_config.yaml b/clients/stratus/configs/localization_agent_config.yaml new file mode 100644 index 0000000..783bfc8 --- /dev/null +++ b/clients/stratus/configs/localization_agent_config.yaml @@ -0,0 +1,89 @@ +# each step is defined as one tool call +max_step: 20 +# diagnosis agent has no synchronous tools +sync_tools: null +async_tools: + - name: get_traces + description: | + Get Jaeger traces for a given service in the last n minutes. + + Args: + service (str): The name of the service for which to retrieve trace data. + last_n_minutes (int): The time range (in minutes) to look back from the current time. + - name: get_services + description: | + Retrieve the list of service names from the Grafana instance. + + Args: + + Returns: + List[str]: A list of service names available in Grafana. + - name: get_operations + description: | + Query available operations for a specific service from the Grafana instance. + + Args: + service (str): The name of the service whose operations should be retrieved. + + Returns: + List[str]: A list of operation names associated with the specified service. + - name: get_dependency_graph + description: | + Get service dependency graph from Jaeger's native dependencies API. + Args: + last_n_minutes (int): The time range (in minutes) to look back from the current time. + Returns: + str: JSON object representing the dependency graph. + - name: get_metrics + description: | + Query real-time metrics data from the Prometheus instance. + + Args: + query (str): A Prometheus Query Language (PromQL) expression used to fetch metric values. + + Returns: + dict: The raw Prometheus response containing metric results, including timestamps, values, and labels. + - name: exec_read_only_kubectl_cmd + description: | + This is a tool used to execute read-only kubectl commands. + + Args: + command (str): The read-only kubectl command you want to execute in a CLI to manage a k8s cluster. It should start with "kubectl". + Available Read-only Commands: + "kubectl api-resources", + "kubectl api-version", + "kubectl auth can-i", + "kubectl cluster-info", + "kubectl describe", + "kubectl diff", + "kubectl events", + "kubectl explain", + "kubectl get", + "kubectl logs", + "kubectl options", + "kubectl top", + "kubectl version", + "kubectl config view", + "kubectl config current-context", + "kubectl config get", + INTERACTIVE COMMANDS ARE NOT SUPPORTED, e.g. "kubectl edit" + INTERACTIVE FLAGS ARE NOT SUPPORTED, e.g., "kubectl logs -f" + - name: get_resource_uid + description: | + Use this tool to retrieve the UID of a specified resource. + + Args: + resource_type (str): The type of the resource (e.g., 'pod', 'service', 'deployment', 'statefulset', 'persistentvolumeclaim', 'persistentvolume', 'configmap', 'replicaset', 'memoryquota', 'ingress', 'networkpolicy' etc.). + resource_name (str): The name of the resource. + namespace (str): The namespace where the resource is located. + Returns: + str: The UID of the specified resource. + - name: submit_tool + description: | + The tool to submit benchmark results + + Args: + ans (str): the answer you would like to submit to the benchmark. Use natural language to describe the root cause of the failure. + +prompts_path: localization_agent_prompts.yaml + diff --git a/clients/stratus/configs/localization_agent_prompts.yaml b/clients/stratus/configs/localization_agent_prompts.yaml new file mode 100644 index 0000000..a3e488f --- /dev/null +++ b/clients/stratus/configs/localization_agent_prompts.yaml @@ -0,0 +1,73 @@ +system: > + Monitor and diagnose an application consisting of **MANY** microservices. Some or none of the microservices have faults. Get all the pods and deployments to figure out what kind of services are running in the cluster. + Carefully identify the whether the faults are present and if they are, and identify what is the root cause of the fault. + + Stop diagnosis once you've found the root cause of the faults. + + Go as deep as you can into what is causing the issue. + + Your instructions to the tools must be clear and concise. + Your queries to tools need to be single turn. + + Remember to check these, and remember this information: + ## Workloads (Applications) + - **Pod**: The smallest deployable unit in Kubernetes, representing a single instance of a running application. Can contain one or more tightly coupled containers. + - **ReplicaSet**: Ensures that a specified number of pod replicas are running at all times. Often managed indirectly through Deployments. + - **Deployment**: Manages the deployment and lifecycle of applications. Provides declarative updates for Pods and ReplicaSets. + - **StatefulSet**: Manages stateful applications with unique pod identities and stable storage. Used for workloads like databases. + - **DaemonSet**: Ensures that a copy of a specific pod runs on every node in the cluster. Useful for node monitoring agents, log collectors, etc. + - **Job**: Manages batch processing tasks that are expected to complete successfully. Ensures pods run to completion. + - **CronJob**: Schedules jobs to run at specified times or intervals (similar to cron in Linux). + + ## Networking + - **Service**: Provides a stable network endpoint for accessing a group of pods. Types: ClusterIP, NodePort, LoadBalancer, and ExternalName. + - **Ingress**: Manages external HTTP(S) access to services in the cluster. Supports routing and load balancing for HTTP(S) traffic. + - **NetworkPolicy**: Defines rules for network communication between pods and other entities. Used for security and traffic control. + + ## Storage + - **PersistentVolume (PV)**: Represents a piece of storage in the cluster, provisioned by an administrator or dynamically. + - **PersistentVolumeClaim (PVC)**: Represents a request for storage by a user. Binds to a PersistentVolume. + - **StorageClass**: Defines different storage tiers or backends for dynamic provisioning of PersistentVolumes. + - **ConfigMap**: Stores configuration data as key-value pairs for applications. + - **Secret**: Stores sensitive data like passwords, tokens, or keys in an encrypted format. + + ## Configuration and Metadata + - **Namespace**: Logical partitioning of resources within the cluster for isolation and organization. + - **ConfigMap**: Provides non-sensitive configuration data in key-value format. + - **Secret**: Stores sensitive configuration data securely. + - **ResourceQuota**: Restricts resource usage (e.g., CPU, memory) within a namespace. + - **LimitRange**: Enforces minimum and maximum resource limits for containers in a namespace. + + ## Cluster Management + - **Node**: Represents a worker machine in the cluster (virtual or physical). Runs pods and is managed by the control plane. + - **ClusterRole and Role**: Define permissions for resources at the cluster or namespace level. + - **ClusterRoleBinding and RoleBinding**: Bind roles to users or groups for authorization. + - **ServiceAccount**: Associates processes in pods with permissions for accessing the Kubernetes API. + + After you finished, submit the detected faulty components to the orchestrator for evaluation. + The submission should be a natural language description of the root cause of the failure. + +user: > + You will be working this application: + + {app_name} + + Here are some descriptions about the application: + + {app_description} + + It belongs to this namespace: + + {app_namespace} + + In each round, there is a thinking stage. In the thinking stage, you are given a list of tools. Think about what you want to call. Return your tool choice and the reasoning behind + When choosing the tool, refer to the tool by its name. + Then, there is a tool-call stage, where you make a tool_call consistent with your explanation. + You can run up to {max_step} rounds to finish the tasks. + If you call submit_tool in tool-call stage, the process will end immediately. + If you exceed this limitation, the system will force you to make a submission. + You will begin by analyzing the service's state and telemetry with the tools. + +localization_summary_prompt: | + You are a helper agent to an autonomous SRE agent. + Summarize the agent's trace and output the potential fault location and causes. \ No newline at end of file diff --git a/clients/stratus/configs/mitigation_agent_config.yaml b/clients/stratus/configs/mitigation_agent_config.yaml new file mode 100644 index 0000000..192e217 --- /dev/null +++ b/clients/stratus/configs/mitigation_agent_config.yaml @@ -0,0 +1,74 @@ +# each step is defined as one tool call +max_step: 20 +max_retry_attempts: 10 +# choose from: naive, validate, none +retry_mode: validate +sync_tools: + - name: wait_tool + description: | + Use this tool to wait for you action to take effect. The upper limit is 120 seconds. + Any value above 120 seconds will be truncated to 120 seconds. If you call this tool + along with other tools in your tool_calls list, this tool will be scheduled to the + last for execution. + + Args: + seconds (int): Number of seconds to wait. +async_tools: + - name: get_traces + description: | + Get Jaeger traces for a given service in the last n minutes. + + Args: + service (str): The name of the service for which to retrieve trace data. + last_n_minutes (int): The time range (in minutes) to look back from the current time. + - name: get_services + description: | + Retrieve the list of service names from the Grafana instance. + + Args: + + Returns: + List[str]: A list of service names available in Grafana. + - name: get_operations + description: | + Query available operations for a specific service from the Grafana instance. + + Args: + service (str): The name of the service whose operations should be retrieved. + + Returns: + List[str]: A list of operation names associated with the specified service. + - name: get_dependency_graph + description: | + Get service dependency graph from Jaeger's native dependencies API. + Args: + last_n_minutes (int): The time range (in minutes) to look back from the current time. + Returns: + str: JSON object representing the dependency graph. + - name: get_metrics + description: | + Query real-time metrics data from the Prometheus instance. + + Args: + query (str): A Prometheus Query Language (PromQL) expression used to fetch metric values. + + Returns: + dict: The raw Prometheus response containing metric results, including timestamps, values, and labels. + - name: exec_kubectl_cmd_safely + description: | + This is a tool used to safely execute kubectl commands. Whatever commands you executed will be stored in a rollback + mechanism which later is used to roll back the cluster state in case your current attempt fails. + + Args: + command (str): The kubectl command you would like to execute, must start with "kubectl" + INTERACTIVE COMMANDS ARE NOT SUPPORTED, e.g. "kubectl edit" + INTERACTIVE FLAGS ARE NOT SUPPORTED, e.g., "kubectl logs -f" + - name: f_submit_tool + description: | + The tool to submit benchmark results + + Args: + ans (str): the answer you would like to submit to the benchmark +prompts_path: mitigation_agent_prompts.yaml + + diff --git a/clients/stratus/configs/mitigation_agent_prompts.yaml b/clients/stratus/configs/mitigation_agent_prompts.yaml new file mode 100644 index 0000000..7488c57 --- /dev/null +++ b/clients/stratus/configs/mitigation_agent_prompts.yaml @@ -0,0 +1,96 @@ +system: > + Mitigate the identified faults in an IT incident. + Some or none of the microservices have faults. + Get all the pods and deployments to figure out what kind of services are running in the cluster if you don't know what the services are. + You should carefully identify the whether the faults are present and if they are, what is the root cause of the fault. + You can stop mitigation once you've fixed all the faults. + + Go as deep as you can into what is causing the issue, and mitigate the fault. + + Your instructions to the tools must be clear and concise. + Your queries to tools need to be single turn. + + Remember to check these, and remember this information: + ## Workloads (Applications) + - **Pod**: The smallest deployable unit in Kubernetes, representing a single instance of a running application. Can contain one or more tightly coupled containers. + - **ReplicaSet**: Ensures that a specified number of pod replicas are running at all times. Often managed indirectly through Deployments. + - **Deployment**: Manages the deployment and lifecycle of applications. Provides declarative updates for Pods and ReplicaSets. + - **StatefulSet**: Manages stateful applications with unique pod identities and stable storage. Used for workloads like databases. + - **DaemonSet**: Ensures that a copy of a specific pod runs on every node in the cluster. Useful for node monitoring agents, log collectors, etc. + - **Job**: Manages batch processing tasks that are expected to complete successfully. Ensures pods run to completion. + - **CronJob**: Schedules jobs to run at specified times or intervals (similar to cron in Linux). + + ## Networking + - **Service**: Provides a stable network endpoint for accessing a group of pods. Types: ClusterIP, NodePort, LoadBalancer, and ExternalName. + - **Ingress**: Manages external HTTP(S) access to services in the cluster. Supports routing and load balancing for HTTP(S) traffic. + - **NetworkPolicy**: Defines rules for network communication between pods and other entities. Used for security and traffic control. + + ## Storage + - **PersistentVolume (PV)**: Represents a piece of storage in the cluster, provisioned by an administrator or dynamically. + - **PersistentVolumeClaim (PVC)**: Represents a request for storage by a user. Binds to a PersistentVolume. + - **StorageClass**: Defines different storage tiers or backends for dynamic provisioning of PersistentVolumes. + - **ConfigMap**: Stores configuration data as key-value pairs for applications. + - **Secret**: Stores sensitive data like passwords, tokens, or keys in an encrypted format. + + ## Configuration and Metadata + - **Namespace**: Logical partitioning of resources within the cluster for isolation and organization. + - **ConfigMap**: Provides non-sensitive configuration data in key-value format. + - **Secret**: Stores sensitive configuration data securely. + - **ResourceQuota**: Restricts resource usage (e.g., CPU, memory) within a namespace. + - **LimitRange**: Enforces minimum and maximum resource limits for containers in a namespace. + + ## Cluster Management + - **Node**: Represents a worker machine in the cluster (virtual or physical). Runs pods and is managed by the control plane. + - **ClusterRole and Role**: Define permissions for resources at the cluster or namespace level. + - **ClusterRoleBinding and RoleBinding**: Bind roles to users or groups for authorization. + - **ServiceAccount**: Associates processes in pods with permissions for accessing the Kubernetes API. + + An example procedure to remediate the faults: + 1) Formulate a remediation plan with a list of actionable steps. + 2) Execute the plan, one step at a time. + 3) Check if the plan execution worked as you desired in the IT environment. + 4) If not, you can either call wait_tool to wait for it to take effect or take other actions. + 5) Otherwise, continue the plan and execution process until you call submit_tool as you believe the application has become healthy. + + The following is a detailed description of your tasks. + + 1) mitigation: Mitigate the identified faults in an IT incident with the provided tools. You can submit an empty dict "ans" with the submit_tool + as this task is not graded over your answer but the final result of the mitigation; therefore, you have to make sure the + application has become healthy before you call submit_tool. + +user: | + You will be working this application: + + {app_name} + + Here are some descriptions about the application: + + {app_description} + + It belongs to this namespace: + + {app_namespace} + + The following is the information of faults identified by a diagnosis agent in the app: + + {faults_info} + + In each round, there is a thinking stage. In the thinking stage, you are given a list of tools. Think about what you want to call. Return your tool choice and the reasoning behind. + When choosing the tool, refer to the tool by its name. + Then, there is a tool-call stage, where you make a tool_call consistent with your explanation. + You can run up to {max_step} rounds to finish the tasks. + If you call submit_tool in tool-call stage, the process will end immediately. + If you exceed this limitation, the system will force you to make a submission. + You will begin by analyzing the service's state and telemetry with the tools. + +retry_user: | + The result from the last attempt of mitigation is as follows: + + {last_result} + + There are some reflections from the previous run: + + {reflection} + + Next, use the provided tools to mitigate the faults. + It is a good habit to verify the information of faults first before you take any actions for mitigation. \ No newline at end of file diff --git a/clients/stratus/configs/rollback_agent_config.yaml b/clients/stratus/configs/rollback_agent_config.yaml new file mode 100644 index 0000000..4194add --- /dev/null +++ b/clients/stratus/configs/rollback_agent_config.yaml @@ -0,0 +1,24 @@ +# each step is defined as one tool call +max_step: 20 +sync_tools: + - name: wait_tool + description: | + Use this tool to wait for you action to take effect. The upper limit is 120 seconds. + Any value above 120 seconds will be truncated to 120 seconds. If you call this tool + along with other tools in your tool_calls list, this tool will be scheduled to the + last for execution. + + Args: + seconds (int): Number of seconds to wait. +async_tools: + - name: rollback_command + description: | + Use this function to roll back the last kubectl command you observed + When you call this tool multiple times, you will roll-back previous commands in the reverse order of their prior execution." + - name: get_previous_rollbackable_cmd + description: | + Use this function to get a list of commands you previously executed that could be roll-backed. + - name: r_submit_tool + description: | + The tool to submit after you rolled back all the changes. +prompts_path: rollback_agent_prompts.yaml \ No newline at end of file diff --git a/clients/stratus/configs/rollback_agent_prompts.yaml b/clients/stratus/configs/rollback_agent_prompts.yaml new file mode 100644 index 0000000..a12ce20 --- /dev/null +++ b/clients/stratus/configs/rollback_agent_prompts.yaml @@ -0,0 +1,28 @@ +system: > + You are a smart and capable tool using agent. You are an expert at remediating the faults + in IT environments. You have tons of experience with kubernetes and SRE tools. + + Your responsibility is to rollback the changes that were made to the system to mitigate the + identified faults in an IT incident. You are given a rollback tool to help you rollback the system. + + You are given a system and a rollback tool. The system is either in a initial faulty state, + or in the state you left it incompletely repaired after the previous round of mitigation. + You need to rollback the system to the initial state since the operations you performed in + the previous round may negatively impact your current round of mitigation. + + The following is a detailed description of your tasks. + + 1) rollback: You should run the rollback tool as many times as you can, since this rollback tool + undos the last operation performed on the system. You should run the rollback tool until it returns + that there are no more operations to undo. If you believe you've finished the task, you should submit + an empty dict "ans" with the submit_tool to trigger the end of the task. + + +user: > + In each round, there is a thinking stage. In the thinking stage, you are given a list of tools. Think about what you want to call. Return your tool choice and the reasoning behind. + When choosing the tool, refer to the tool by its name. + Then, there is a tool-call stage, where you make a tool_call consistent with your explanation. + You can run up to {max_step} rounds to finish the tasks. + If you call submit_tool in tool-call stage, the process will end immediately. + If you exceed this limitation, the system will force you to make a submission. + You will begin by analyzing the service's state and telemetry with the tools. \ No newline at end of file diff --git a/clients/stratus/configs/stratus_config.py b/clients/stratus/configs/stratus_config.py new file mode 100644 index 0000000..a5eb720 --- /dev/null +++ b/clients/stratus/configs/stratus_config.py @@ -0,0 +1,28 @@ +import logging +from pathlib import Path + +from dotenv import load_dotenv +from pydantic import BaseModel, Field + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + +parent_dir = Path(__file__).resolve().parent + +load_dotenv() + + +class BaseAgentCfg(BaseModel): + max_round: int = Field(description="maximum rounds allowed for tool calling", gt=0) + + prompts_file_path: str = Field( + description="prompts used for diagnosis agent", + ) + + sync_tools: list[str] = Field( + description="provided sync tools for the agent", + ) + + async_tools: list[str] = Field( + description="provided async tools for the agent", + ) diff --git a/clients/stratus/llm_backend/__init__.py b/clients/stratus/llm_backend/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/clients/stratus/llm_backend/get_llm_backend.py b/clients/stratus/llm_backend/get_llm_backend.py new file mode 100644 index 0000000..982e048 --- /dev/null +++ b/clients/stratus/llm_backend/get_llm_backend.py @@ -0,0 +1,353 @@ +"""Adopted from previous project""" + +import json +import logging +import os +import time +from typing import Any, Dict, Optional + +import litellm +import openai +from dotenv import load_dotenv +from langchain_core.messages import AIMessage, HumanMessage, SystemMessage +from langchain_google_genai import ChatGoogleGenerativeAI +from langchain_ibm import ChatWatsonx +from langchain_litellm import ChatLiteLLM +from langchain_openai import ChatOpenAI +from litellm.utils import trim_messages +from requests.exceptions import HTTPError + +from clients.stratus.llm_backend.trim_util import trim_messages_conservative + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + +load_dotenv() + +LLM_QUERY_MAX_RETRIES = int(os.getenv("LLM_QUERY_MAX_RETRIES", "5")) # Maximum number of retries for rate-limiting +LLM_QUERY_INIT_RETRY_DELAY = int(os.getenv("LLM_QUERY_INIT_RETRY_DELAY", "1")) # Initial delay in seconds + + +class LiteLLMBackend: + + def __init__( + self, + provider: str, + model_name: str, + url: str, + api_key: str, + api_version: str, + seed: int, + top_p: float, + temperature: float, + reasoning_effort: str, + thinking_tools: str, + thinking_budget_tools: int, + max_tokens: int, + extra_headers: Optional[Dict[str, str]] = None, + ): + self.provider = provider + self.model_name = model_name + self.url = url + self.api_key = api_key + self.api_version = api_version + self.temperature = temperature + self.seed = seed + self.top_p = top_p + self.reasoning_effort = reasoning_effort + self.thinking_tools = thinking_tools + self.thinking_budget_tools = thinking_budget_tools + self.max_tokens = max_tokens + self.extra_headers = extra_headers + litellm.drop_params = True + litellm.modify_params = True # for Anthropic + + def inference( + self, + messages: str | list[SystemMessage | HumanMessage | AIMessage], + system_prompt: Optional[str] = None, + tools: Optional[list[any]] = None, + ): + if isinstance(messages, str): + # logger.info(f"NL input as str received: {messages}") + # FIXME: This should be deprecated as it does not contain prior history of chat. + # We are building new agents on langgraph, which will change how messages are + # composed. + if system_prompt is None: + logger.info("No system prompt provided. Using default system prompt.") + system_prompt = "You are a helpful assistant." + prompt_messages = [ + SystemMessage(content=system_prompt), + HumanMessage(content=messages), + ] + elif isinstance(messages, list): + prompt_messages = messages + if len(messages) == 0: + arena_logger = logging.getLogger("sregym-global") + arena_logger.info("[ERROR] Canary died!") + elif isinstance(messages[0], HumanMessage): + # logger.info("No system message provided.") + system_message = SystemMessage(content="You are a helpful assistant.") + if system_prompt is None: + logger.warning("No system prompt provided. Using default system prompt.") + else: + # logger.info("Using system prompt provided.") + system_message.content = system_prompt + # logger.info(f"inserting [{system_message}] at the beginning of messages") + prompt_messages.insert(0, system_message) + arena_logger = logging.getLogger("sregym-global") + arena_logger.info(f"[PROMPT] (inserted system prompt at the beginning) \n {system_message}") + else: + raise ValueError(f"messages must be either a string or a list of dicts, but got {type(messages)}") + + if self.provider == "openai": + # Some models (o1, o3, gpt-5) don't support top_p and temperature + model_config = { + "model": self.model_name, + } + # Only add temperature and top_p for models that support them + # Reasoning models (o1, o3) and newer models (gpt-5) don't support these params + if not any(prefix in self.model_name.lower() for prefix in ["o1", "o3", "gpt-5"]): + model_config["temperature"] = self.temperature + model_config["top_p"] = self.top_p + llm = ChatOpenAI(**model_config) + elif self.provider == "watsonx": + llm = ChatWatsonx( + model_id=self.model_name, + url=self.url, + project_id=os.environ["WX_PROJECT_ID"], + apikey=self.api_key, + temperature=self.temperature, + ) + # FIXME: openai client is broken, but we can just use LiteLLM to use openai + elif self.provider == "openai": + llm = ChatLiteLLM( + model=self.model_name, + temperature=self.temperature, + top_p=self.top_p, + api_key=self.api_key, + ) + elif self.provider == "litellm": + llm = ChatLiteLLM( + model=self.model_name, + temperature=self.temperature, + top_p=self.top_p, + ) + elif self.provider == "compatible": + llm = ChatLiteLLM( + model=self.model_name, + temperature=self.temperature, + top_p=self.top_p, + api_key=self.api_key, + api_base=self.url, + ) + else: + raise ValueError(f"Unsupported provider: {self.provider}") + + if tools: + # logger.info(f"binding tools to llm: {tools}") + llm = llm.bind_tools(tools, tool_choice="auto") + + # FIXME: when using openai models, finish_reason would be the function name + # if the model decides to do function calling + # TODO: check how does function call looks like in langchain + + # Retry logic for rate-limiting + retry_delay = LLM_QUERY_INIT_RETRY_DELAY + trim_message = False + + for attempt in range(LLM_QUERY_MAX_RETRIES): + try: + # trim the first ten message who are AI messages and user messages + if trim_message: + arena_logger = logging.getLogger("sregym-global") + new_prompt_messages, trim_sum = trim_messages_conservative(prompt_messages) + arena_logger.info(f"[WARNING] Trimming the {trim_sum}/{len(prompt_messages)} messages") + prompt_messages = new_prompt_messages + completion = llm.invoke(input=prompt_messages) + # logger.info(f">>> llm response: {completion}") + return completion + except openai.BadRequestError as e: + # BadRequestError indicates malformed request (e.g., missing tool responses) + # Don't retry as the request itself is invalid + logger.error(f"Bad request error - request is malformed: {e}") + logger.error(f"Error details: {e.response.json() if hasattr(e, 'response') else 'No response details'}") + logger.error("This often happens when tool_calls don't have matching tool response messages.") + logger.error( + f"Last few messages: {prompt_messages[-3:] if len(prompt_messages) >= 3 else prompt_messages}" + ) + raise + except (openai.RateLimitError, HTTPError) as e: + # Rate-limiting errors - retry with exponential backoff + logger.warning( + f"Rate-limited. Retrying in {retry_delay} seconds... (Attempt {attempt + 1}/{LLM_QUERY_MAX_RETRIES})" + ) + + arena_logger = logging.getLogger("sregym-global") + arena_logger.info( + f"[WARNING] HTTP error occurred: {e}. Retrying in {retry_delay} seconds... (Attempt {attempt + 1}/{LLM_QUERY_MAX_RETRIES})" + ) + time.sleep(retry_delay) + retry_delay *= 2 # Exponential backoff + except openai.APIError as e: + # Other OpenAI API errors + logger.error(f"OpenAI API error occurred: {e}") + raise + # else: + # logger.error(f"HTTP error occurred: {e}") + # raise + + except litellm.RateLimitError as e: + provider_delay = _extract_retry_delay_seconds_from_exception(e) + if provider_delay is not None and provider_delay > 0: + arena_logger = logging.getLogger("sregym-global") + arena_logger.info( + f"[WARNING] Rate-limited by provider. Retrying in {provider_delay} seconds... (Attempt {attempt + 1}/{LLM_QUERY_MAX_RETRIES})" + ) + time.sleep(provider_delay) + else: # actually this fallback should not happen + arena_logger = logging.getLogger("sregym-global") + arena_logger.info( + f"Rate-limited. Retrying in {retry_delay} seconds... (Attempt {attempt + 1}/{LLM_QUERY_MAX_RETRIES})" + ) + time.sleep(retry_delay) + retry_delay *= 2 # Exponential backoff + + trim_message = True # reduce overhead + except litellm.ServiceUnavailableError as e: # 503 + arena_logger = logging.getLogger("sregym-global") + arena_logger.info( + f"[WARNING] Service unavailable (mostly 503). Retrying in 60 seconds... (Attempt {attempt + 1}/{LLM_QUERY_MAX_RETRIES})" + ) + time.sleep(60) + trim_message = True # reduce overhead + except IndexError as e: + arena_logger = logging.getLogger("sregym-global") + arena_logger.info( + f"[ERROR] IndexError occurred on Gemini Server Side: {e}, keep calm for a while... {attempt + 1}/{LLM_QUERY_MAX_RETRIES}" + ) + trim_message = True + time.sleep(30) + if attempt == LLM_QUERY_MAX_RETRIES - 1: + arena_logger = logging.getLogger("sregym-global") + arena_logger.info( + f"[WARNING] Max retries exceeded due to index error. Unable to complete the request." + ) + # return an error + return AIMessage(content="Server side error") + except Exception as e: + logger.error(f"An unexpected error occurred: {e}") + raise + + raise RuntimeError("Max retries exceeded. Unable to complete the request.") + + +def _parse_duration_to_seconds(duration: Any) -> Optional[float]: + """Convert duration to seconds. + + Supports: + - string like "56s" or "56.374s" + - dict with {"seconds": int, "nanos": int} + - numeric seconds + """ + if duration is None: + return None + if isinstance(duration, (int, float)): + return float(duration) + if isinstance(duration, str): + val = duration.strip().lower() + if val.endswith("s"): + try: + return float(val[:-1]) + except ValueError: + return None + return None + if isinstance(duration, dict): + seconds = duration.get("seconds") + nanos = duration.get("nanos", 0) + if isinstance(seconds, (int, float)): + return float(seconds) + (float(nanos) / 1_000_000_000.0) + return None + + +def _extract_retry_delay_seconds_from_exception(exc: BaseException) -> Optional[float]: + """Extract retry delay seconds from JSON details RetryInfo only. + + Returns 60.0 if no RetryInfo found in error details. + """ + candidates: list[Any] = [] + + print(f"exc: {exc}") + + # response.json() or response.text + response = getattr(exc, "response", None) + if response is not None: + try: + if hasattr(response, "json"): + candidates.append(response.json()) + except Exception: + pass + try: + text = getattr(response, "text", None) + if isinstance(text, (str, bytes)): + candidates.append(json.loads(text)) + except Exception: + pass + + # message/body/content attributes + for attr in ("body", "message", "content"): + try: + val = getattr(exc, attr, None) + if isinstance(val, (dict, list)): + candidates.append(val) + elif isinstance(val, (str, bytes)): + candidates.append(json.loads(val)) + except Exception: + pass + + # args may contain dict/JSON strings + try: + for arg in getattr(exc, "args", []) or []: + if isinstance(arg, (dict, list)): + candidates.append(arg) + elif isinstance(arg, (str, bytes)): + try: + candidates.append(json.loads(arg)) + except Exception: + pass + except Exception: + pass + + def find_retry_delay(data: Any) -> Optional[float]: + if data is None: + return None + if isinstance(data, dict): + # Error envelope {"error": {...}} + if "error" in data: + found = find_retry_delay(data.get("error")) + if found is not None: + return found + # Google RPC details list + details = data.get("details") + if isinstance(details, list): + for item in details: + if isinstance(item, dict): + type_url = item.get("@type") or item.get("type") + if type_url and "google.rpc.RetryInfo" in type_url: + parsed = _parse_duration_to_seconds(item.get("retryDelay")) + if parsed is not None: + return parsed + elif isinstance(data, list): + for v in data: + found = find_retry_delay(v) + if found is not None: + return found + return None + + for cand in candidates: + delay = find_retry_delay(cand) + if delay is not None: + return delay + + # Default to 60 seconds if no RetryInfo found + return 60.0 diff --git a/clients/stratus/llm_backend/init_backend.py b/clients/stratus/llm_backend/init_backend.py new file mode 100644 index 0000000..eb983f2 --- /dev/null +++ b/clients/stratus/llm_backend/init_backend.py @@ -0,0 +1,177 @@ +"""Adopted from previous project""" + +import os + +from dotenv import load_dotenv + +from clients.stratus.llm_backend.get_llm_backend import LiteLLMBackend + +load_dotenv() + +global PROVIDER_TOOLS, MODEL_TOOLS, URL_TOOLS, API_VERSION_TOOLS, API_KEY_TOOLS, REASONING_EFFORT_TOOLS, SEED_TOOLS, TOP_P_TOOLS, TEMPERATURE_TOOLS, THINKING_TOOLS, THINKING_BUDGET_TOOLS, MAX_TOKENS_TOOLS + +try: + PROVIDER = os.environ["PROVIDER"] +except KeyError: + PROVIDER = "openai" + print("Unable to find environment variable - PROVIDER, setting to openai...") + raise + +try: + PROVIDER_TOOLS = os.environ["PROVIDER_TOOLS"] +except KeyError: + PROVIDER_TOOLS = "" + print("Unable to find environment variable - PROVIDER_TOOLS.") + raise + +try: + MODEL_TOOLS = os.environ["MODEL_TOOLS"] +except KeyError: + MODEL_TOOLS = "" + print("Unable to find environment variable - MODEL_TOOLS.") + raise + +try: + URL_TOOLS = os.environ["URL_TOOLS"].rstrip("/") +except KeyError: + URL_TOOLS = "" + print("Unable to find environment variable, leave it empty - URL_TOOLS.") + +try: + API_KEY_TOOLS = os.environ["API_KEY_TOOLS"] + # os.environ["OPENAI_API_KEY"] = API_KEY_TOOLS # should not use this fallback +except KeyError: + API_KEY_TOOLS = "" + print("Unable to find environment variable, leave it empty - API_KEY_TOOLS.") + +try: + SEED_TOOLS = int(os.environ["SEED_TOOLS"]) +except KeyError: + SEED_TOOLS = 10 + print(f"Unable to find environment variable - SEED_TOOLS. Defaulting to {SEED_TOOLS}.") + +try: + TOP_P_TOOLS = float(os.environ["TOP_P_TOOLS"]) +except KeyError: + TOP_P_TOOLS = 0.95 + print(f"Unable to find environment variable - TOP_P_TOOLS. Defaulting to {TOP_P_TOOLS}.") + +try: + TEMPERATURE_TOOLS = float(os.environ["TEMPERATURE_TOOLS"]) +except KeyError: + TEMPERATURE_TOOLS = 0.0 + print(f"Unable to find environment variable - TEMPERATURE_TOOLS. Defaulting to {TEMPERATURE_TOOLS}.") +except ValueError as e: + print("Incorrect TEMPERATURE_TOOLS value:", e) + raise + +try: + REASONING_EFFORT_TOOLS = str(os.environ["REASONING_EFFORT_TOOLS"]).lower() +except KeyError: + REASONING_EFFORT_TOOLS = "" + print(f"Unable to find environment variable - REASONING_EFFORT_TOOLS. Setting to {REASONING_EFFORT_TOOLS}.") + +try: + API_VERSION_TOOLS = os.environ["API_VERSION_TOOLS"] +except KeyError: + API_VERSION_TOOLS = "" + print(f"Unable to find environment variable - API_VERSION_TOOLS. Setting to {API_VERSION_TOOLS}.") + +try: + THINKING_TOOLS = os.environ["THINKING_TOOLS"] +except KeyError: + THINKING_TOOLS = "" + print(f"Unable to find environment variable - THINKING_TOOLS. Setting to {THINKING_TOOLS}.") + +try: + WX_PROJECT_ID = os.environ["WX_PROJECT_ID"] +except KeyError: + WX_PROJECT_ID = "" + print(f"Unable to find environment variable - WX_PROJECT_ID. Setting to {WX_PROJECT_ID}.") + +try: + WATSONX_API_BASE = os.environ["WATSONX_API_BASE"] +except KeyError: + WATSONX_API_BASE = "https://us-south.ml.cloud.ibm.com" + print(f"Unable to find environment variable - WATSONX_API_BASE. Setting to {WATSONX_API_BASE}.") + + +try: + THINKING_BUDGET_TOOLS = int(os.environ["THINKING_BUDGET_TOOLS"]) +except KeyError: + THINKING_BUDGET_TOOLS = 16000 + print(f"Unable to find environment variable - THINKING_BUDGET_TOOLS. Setting to {THINKING_BUDGET_TOOLS}.") + +try: + MAX_TOKENS_TOOLS = int(os.environ["MAX_TOKENS_TOOLS"]) +except KeyError: + MAX_TOKENS_TOOLS = 16000 + print(f"Unable to find environment variable - MAX_TOKENS_TOOLS. Setting to {MAX_TOKENS_TOOLS}.") + + +def get_llm_backend_for_tools(): + if PROVIDER == "watsonx": + try: + WATSONX_API_KEY = os.environ["WATSONX_API_KEY"] + except KeyError: + print(f"Unable to find environment variable - WATSONX_API_KEY. Exiting...") + exit(1) + return LiteLLMBackend( + provider=PROVIDER, + model_name=MODEL_TOOLS, + url=URL_TOOLS, + api_key=WATSONX_API_KEY, + api_version=API_VERSION_TOOLS, + seed=SEED_TOOLS, + top_p=TOP_P_TOOLS, + temperature=TEMPERATURE_TOOLS, + reasoning_effort=REASONING_EFFORT_TOOLS, + max_tokens=MAX_TOKENS_TOOLS, + thinking_tools=THINKING_TOOLS, + thinking_budget_tools=THINKING_BUDGET_TOOLS, + ) + elif PROVIDER == "openai": + return LiteLLMBackend( + provider=PROVIDER, + model_name=MODEL_TOOLS, + url=URL_TOOLS, + api_key=API_KEY_TOOLS, + api_version=API_VERSION_TOOLS, + seed=SEED_TOOLS, + top_p=TOP_P_TOOLS, + temperature=TEMPERATURE_TOOLS, + reasoning_effort=REASONING_EFFORT_TOOLS, + max_tokens=MAX_TOKENS_TOOLS, + thinking_tools=THINKING_TOOLS, + thinking_budget_tools=THINKING_BUDGET_TOOLS, + ) + elif PROVIDER == "litellm": + return LiteLLMBackend( + provider=PROVIDER, + model_name=MODEL_TOOLS, + url=URL_TOOLS, # not used + api_key=API_KEY_TOOLS, + api_version=API_VERSION_TOOLS, + seed=SEED_TOOLS, + top_p=TOP_P_TOOLS, + temperature=TEMPERATURE_TOOLS, + reasoning_effort=REASONING_EFFORT_TOOLS, + thinking_tools=THINKING_TOOLS, + thinking_budget_tools=THINKING_BUDGET_TOOLS, + max_tokens=MAX_TOKENS_TOOLS, + ) + elif PROVIDER == "compatible": + return LiteLLMBackend( + provider=PROVIDER, + model_name=MODEL_TOOLS, + url=URL_TOOLS, + api_key=API_KEY_TOOLS, + api_version=API_VERSION_TOOLS, + seed=SEED_TOOLS, + top_p=TOP_P_TOOLS, + temperature=TEMPERATURE_TOOLS, + reasoning_effort=REASONING_EFFORT_TOOLS, + thinking_tools=THINKING_TOOLS, + thinking_budget_tools=THINKING_BUDGET_TOOLS, + max_tokens=MAX_TOKENS_TOOLS, + ) diff --git a/clients/stratus/llm_backend/trim_util.py b/clients/stratus/llm_backend/trim_util.py new file mode 100644 index 0000000..1e326b5 --- /dev/null +++ b/clients/stratus/llm_backend/trim_util.py @@ -0,0 +1,40 @@ +"""Trim utility for langchain message types.""" + +import copy +from langchain_core.messages import HumanMessage + + +def trim_messages_conservative( + messages, + kept_threshold: int = 30 +): + """ + Trim messages by keeping the last kept_threshold messages unchanged, + and replacing HumanMessage content with "..." for earlier messages. + + Args: + messages: List of langchain messages to trim + kept_threshold: Number of messages to keep unchanged from the end (default: 30) + + Returns: + Deep copy of trimmed messages without modifying the original + """ + # Create a deep copy to avoid modifying the original + trimmed_messages = copy.deepcopy(messages) + + trim_sum = 0 + # If we have fewer messages than the threshold, return all unchanged + if len(trimmed_messages) <= kept_threshold: + return trimmed_messages, 0 + + # Calculate how many messages to process from the beginning + messages_to_trim = len(trimmed_messages) - kept_threshold + + # Process the first messages_to_trim messages + for i in range(messages_to_trim): + message = trimmed_messages[i] + # Only replace content for HumanMessage, keep others unchanged + if isinstance(message, HumanMessage): + message.content = "..." + trim_sum += 1 + return trimmed_messages, trim_sum diff --git a/clients/stratus/stratus_agent/README.md b/clients/stratus/stratus_agent/README.md new file mode 100644 index 0000000..9987733 --- /dev/null +++ b/clients/stratus/stratus_agent/README.md @@ -0,0 +1,7 @@ +# Agent Design and Implementation +Though fancy on text, all four stratus agents share mostly the same agent implementation. They are implemented as custom REACT agents, which all inherit from the `BaseAgent` class in [`base_agent.py`](https://github.com/SREGym/SREGym/blob/main/clients/stratus/stratus_agent/base_agent.py). +Different agents, such as `DiagnosisAgent` and `MitigationAgent`, are distinguished by their prompts and tool usage. + +In every round, the agent gets a "thinking step," where it chooses a tool and justify its usage. It then gets an "action step," where it generates the correct tool call for the chosen tool. + +After a step limit, the agent is forced to submit a result to the benchmark. diff --git a/clients/stratus/stratus_agent/__init__.py b/clients/stratus/stratus_agent/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/clients/stratus/stratus_agent/base_agent.py b/clients/stratus/stratus_agent/base_agent.py new file mode 100644 index 0000000..e7fac76 --- /dev/null +++ b/clients/stratus/stratus_agent/base_agent.py @@ -0,0 +1,244 @@ +import logging + +from langchain_core.callbacks import UsageMetadataCallbackHandler +from langchain_core.messages import HumanMessage, SystemMessage +from langgraph.graph import StateGraph +from langgraph.graph.state import CompiledStateGraph + +from clients.stratus.stratus_agent.state import State +from clients.stratus.tools.stratus_tool_node import StratusToolNode + +logger = logging.getLogger("all.stratus.base") +logger.propagate = True +logger.setLevel(logging.DEBUG) + + +class BaseAgent: + def __init__(self, llm, max_step, sync_tools, async_tools, submit_tool, tool_descs): + self.graph_builder = StateGraph(State) + self.graph: CompiledStateGraph | None = None + self.max_step = max_step + self.async_tools = async_tools + self.sync_tools = sync_tools + self.llm = llm + self.tool_descs = tool_descs + self.submit_tool = submit_tool + self.force_submit_prompt_inject_node = "force_submit_thinking_step" + self.force_submit_tool_call_node = "force_submit_tool_call" + self.llm_force_submit_tool_call_node = StratusToolNode(sync_tools=[], async_tools=[submit_tool]) + self.thinking_prompt_inject_node = "pre_thinking_step" + self.thinking_node = "thinking_step" + self.tool_calling_prompt_inject_node = "pre_tool_calling_step" + self.tool_calling_node = "tool_calling_step" + self.process_tool_call_node = "process_tool_call" + self.post_round_process_node = "post_round_process" + self.callback = UsageMetadataCallbackHandler() + self.arena_logger = logging.getLogger("sregym-global") + self.loop_count = 0 + + def llm_inference_step(self, messages, tools): + return self.llm.inference(messages=messages, tools=tools) + + def llm_thinking_prompt_inject_step(self, state: State): + human_prompt = HumanMessage( + content="You are now in the thinking stage. Here are all the tools you can use:\n" + + self.tool_descs + + "Choose a tool from the list and output the tool name. Justify your tool choice. In the next step, you will generate a tool call for this tool" + ) + self.arena_logger.info(f"[PROMPT] Framework prompt: \n {human_prompt.content}") + if self.loop_count is not None and self.loop_count == 0: + self.local_logger.debug(f"[Loop {self.loop_count}] Inject framework prompt: \n {human_prompt.content}") + else: + self.local_logger.debug(f"[Loop {self.loop_count}] Inject framework prompt \" {human_prompt.content[:20]}... \" again, as above.") + return { + "messages": [human_prompt], + } + + def llm_thinking_step(self, state: State): + # planning step, not providing tool + ai_message = self.llm_inference_step(state["messages"], tools=None) + self.arena_logger.info(f"[LLM] \n {ai_message.content}") + self.local_logger.debug(f"[Loop {self.loop_count}] Ask, and LLM responds: \n {ai_message.content}", extra={"Full Prompt": state["messages"]}) + if ai_message.content == "Server side error": + return { + "messages": [], + } + return { + "messages": [ai_message], + } + + def llm_tool_call_prompt_inject_step(self, state: State): + human_prompt = HumanMessage(content="Now generate a tool call according to your last chosen tool.") + self.arena_logger.info(f"[PROMPT] \n {human_prompt.content}") + if self.loop_count is not None and self.loop_count == 0: + self.local_logger.debug(f"[Loop {self.loop_count}] Inject tool call prompt: \n {human_prompt.content}") + else: + self.local_logger.debug(f"[Loop {self.loop_count}] Inject tool call prompt \" {human_prompt.content[:20]}... \" again, as above.") + return { + "messages": [human_prompt], + } + + def llm_tool_call_step(self, state: State): + if self.sync_tools is None: + if self.async_tools is not None: + ai_message = self.llm_inference_step(state["messages"], tools=self.async_tools) + else: + raise ValueError("the agent must have at least 1 tool!") + else: + if self.async_tools is None: + ai_message = self.llm_inference_step(state["messages"], tools=self.sync_tools) + else: + ai_message = self.llm_inference_step(state["messages"], tools=[*self.sync_tools, *self.async_tools]) + + self.local_logger.debug(f"[Loop {self.loop_count}] Tool call", extra={"Full Prompt": state["messages"]}) + if ai_message.content == "Server side error": + return { + "messages": [], + } + return { + "messages": [ai_message], + } + + def should_submit_router(self, state: State): + should_submit = state["num_steps"] == self.max_step and state["submitted"] == False + self.local_logger.info(f"Should we force the agent submit? {"Yes!" if should_submit else "No!"}") + return self.force_submit_prompt_inject_node if should_submit else self.post_round_process_node + + def post_round_process(self, state: State): + self.local_logger.debug("agent finished a round") + self.local_logger.debug("currently only incrementing step") + self.local_logger.info(f"{'^' * 20} [Loop {self.loop_count}] {'^' * 20}") + self.arena_logger.info("[SPLIT]") + return { + "num_steps": state["num_steps"] + 1, + } + + def llm_force_submit_thinking_step(self, state: State): + human_prompt = HumanMessage( + content="You have reached your step limit, please submit your results by generating a `submit` tool's tool call." + ) + self.arena_logger.info("[WARNING] Agent has not solved the problem until the step limit, force submission.") + self.arena_logger.info(f"[PROMPT] \n {human_prompt.content}") + # self.local_logger.info(f"[Loop {self.loop_count}] Inject force submit prompt: \n {human_prompt.content}") + return {"messages": [human_prompt]} + + def llm_force_submit_tool_call_step(self, state: State): + result = self.llm_inference_step(state["messages"], tools=[self.submit_tool]) + self.arena_logger.info(f"[LLM] \n {result.content}") + # self.local_logger.info(f"[Loop {self.loop_count}] Force submit, and LLM responds: \n {result.content}") + return {"messages": result} + + def save_agent_graph_to_png(self): + try: # in case the service times out + with open("./agent_graph.png", "wb") as png: + png.write(self.graph.get_graph().draw_mermaid_png()) + except Exception as e: + logger.error(f"Error saving agent graph to PNG: {e}") + + def clear_memory(self): + if not hasattr(self, "memory_saver"): + raise RuntimeError("Should not be called on uninitialized agent. Did you call build_agent()?") + # source: https://github.com/langchain-ai/langchain/discussions/19744#discussioncomment-13734390 + thread_id = "1" + try: + if hasattr(self.memory_saver, "storage") and hasattr(self.memory_saver, "writes"): + self.memory_saver.storage.pop(thread_id, None) + + keys_to_remove = [key for key in self.memory_saver.writes.keys() if key[0] == thread_id] + for key in keys_to_remove: + self.memory_saver.writes.pop(key, None) + + print(f"Memory cleared for thread_id: {thread_id}") + return + except Exception as e: + logger.error(f"Error clearing InMemorySaver storage for thread_id {thread_id}: {e}") + + def run(self, starting_prompts): + """Running an agent + + Args: + starting_prompts (list[SystemMessage | HumanMessage]): The data inside the dict will be filled into the prompts. + + Returns: + final state of the agent running, including messages and other state values. + """ + if not self.graph: + raise ValueError("Agent graph is None. Have you built the agent?") + + if len(starting_prompts) == 0: + raise ValueError("No prompts used to start the conversation!") + + state = { + "messages": starting_prompts, + "num_steps": 0, + "submitted": False, + "rollback_stack": "", + } + + return list( + self.graph.stream( + state, + # recursion_limit could be as large as possible as we have our own limit. + config={"recursion_limit": 10000, "configurable": {"thread_id": "1"}, "callbacks": [self.callback]}, + stream_mode="values", + ) + )[-1] + + async def arun(self, starting_prompts): + """ + Async running an agent + + Args: + starting_prompts (dict): The data inside the dict will be filled into the prompts. + + Returns: + final state of the agent running, including messages and other state values. + """ + if not self.graph: + raise ValueError("Agent graph is None. Have you built the agent?") + + if len(starting_prompts) == 0: + raise ValueError("No prompts used to start the conversation!") + + graph_events = [] + while True: + graph_config = {"configurable": {"thread_id": "1"}} + logger.info(f"{'-' * 20} [Loop {self.loop_count}] {'-' * 20}") + last_state = self.graph.get_state(config=graph_config) + if len(last_state.values) != 0: + logger.debug(f"[Loop {self.loop_count}] There were last {len(last_state.values)} states.") + # this is all the previous msgs the agent had, we just inherit them in the next graph traversal + state = last_state.values + else: + logger.debug(f"[Loop {self.loop_count}] There were no states.") + # fresh agent start, init state here + state = { + "messages": starting_prompts, + # "workdir": "", + # "curr_file": "", + # "curr_line": 0, + "num_steps": 0, + # "rec_submission_rounds": 0, + # "submit_tried": False, + "submitted": False, + # "ans": dict(), + "rollback_stack": "", + } + + async for event in self.graph.astream( + state, + # recursion_limit could be as large as possible as we have our own limit. + config={"recursion_limit": 10000, "configurable": {"thread_id": "1"}, "callbacks": [self.callback]}, + stream_mode="values", + ): + if (not graph_events) or event["messages"] != graph_events[-1]["messages"]: + event["messages"][-1].pretty_print() + graph_events.append(event) + last_state = self.graph.get_state(config=graph_config) + if last_state.values["submitted"]: + logger.info(f"[Loop {self.loop_count}] Agent submitted, breaking loop from base_agent") + break + + self.loop_count += 1 + + return last_state diff --git a/clients/stratus/stratus_agent/diagnosis_agent.py b/clients/stratus/stratus_agent/diagnosis_agent.py new file mode 100644 index 0000000..308c233 --- /dev/null +++ b/clients/stratus/stratus_agent/diagnosis_agent.py @@ -0,0 +1,198 @@ +import asyncio +from pathlib import Path + +import yaml +from langchain_core.callbacks import UsageMetadataCallbackHandler +from langgraph.checkpoint.memory import MemorySaver +from langgraph.constants import END, START + +from clients.stratus.llm_backend.init_backend import get_llm_backend_for_tools +from clients.stratus.stratus_agent.base_agent import BaseAgent +from clients.stratus.stratus_agent.mitigation_agent import generate_run_summary +from clients.stratus.stratus_utils.get_logger import get_logger +from clients.stratus.stratus_utils.get_starting_prompt import get_starting_prompts +from clients.stratus.stratus_utils.str_to_tool import str_to_tool +from clients.stratus.tools.stratus_tool_node import StratusToolNode + +import logging +logger = logging.getLogger("all.stratus.diagnosis") +logger.propagate = True +logger.setLevel(logging.DEBUG) + + +class DiagnosisAgent(BaseAgent): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.tool_node = None + self.max_step = kwargs.get("max_step", 20) + self.loop_count = 0 + self.local_logger = logging.getLogger("all.stratus.diagnosis") + + def build_agent(self): + self.tool_node = StratusToolNode(async_tools=self.async_tools, sync_tools=self.sync_tools) + + self.graph_builder.add_node(self.thinking_prompt_inject_node, self.llm_thinking_prompt_inject_step) + self.graph_builder.add_node(self.tool_calling_prompt_inject_node, self.llm_tool_call_prompt_inject_step) + self.graph_builder.add_node(self.thinking_node, self.llm_thinking_step) + self.graph_builder.add_node(self.tool_calling_node, self.llm_tool_call_step) + self.graph_builder.add_node(self.process_tool_call_node, self.tool_node) + self.graph_builder.add_node(self.post_round_process_node, self.post_round_process) + self.graph_builder.add_node(self.force_submit_prompt_inject_node, self.llm_force_submit_thinking_step) + self.graph_builder.add_node(self.force_submit_tool_call_node, self.llm_force_submit_tool_call_step) + + self.graph_builder.add_edge(START, self.thinking_prompt_inject_node) + self.graph_builder.add_edge(self.thinking_prompt_inject_node, self.thinking_node) + self.graph_builder.add_edge(self.thinking_node, self.tool_calling_prompt_inject_node) + self.graph_builder.add_edge(self.tool_calling_prompt_inject_node, self.tool_calling_node) + self.graph_builder.add_edge(self.tool_calling_node, self.process_tool_call_node) + self.graph_builder.add_edge(self.process_tool_call_node, self.post_round_process_node) + self.graph_builder.add_conditional_edges( + self.process_tool_call_node, + self.should_submit_router, + { + self.force_submit_prompt_inject_node: self.force_submit_prompt_inject_node, + self.post_round_process_node: self.post_round_process_node, + }, + ) + self.graph_builder.add_edge(self.force_submit_prompt_inject_node, self.force_submit_tool_call_node) + self.graph_builder.add_edge(self.force_submit_tool_call_node, END) + self.graph_builder.add_edge(self.post_round_process_node, END) + + self.memory_saver = MemorySaver() + self.graph = self.graph_builder.compile(checkpointer=self.memory_saver) + + async def arun(self, starting_prompts): + """ + Async running an agent + + Args: + starting_prompts (dict): The data inside the dict will be filled into the prompts. + + Returns: + final state of the agent running, including messages and other state values. + """ + if not self.graph: + raise ValueError("Agent graph is None. Have you built the agent?") + + if len(starting_prompts) == 0: + raise ValueError("No prompts used to start the conversation!") + + all_init_prompts = "" + for prompt in starting_prompts: + all_init_prompts += prompt.content + "\n" + self.arena_logger.info(f"[PROMPT] \n {all_init_prompts}") + + graph_events = [] + + while True: + graph_config = {"configurable": {"thread_id": "1"}} + + logger.info(f"{'-' * 20} [Loop {self.loop_count}] {'-' * 20}") + last_state = self.graph.get_state(config=graph_config) + # logger.info("last state: %s", last_state) + if len(last_state.values) != 0: + logger.debug(f"[Loop {self.loop_count}] There were last {len(last_state.values)} states.") + # this is all the previous msgs the agent had, we just inherit them in the next graph traversal + state = last_state.values + else: + logger.debug(f"[Loop {self.loop_count}] There were no states.") + # fresh agent start, init state here + state = { + "messages": starting_prompts, + # "workdir": "", + # "curr_file": "", + # "curr_line": 0, + "num_steps": 0, + # "rec_submission_rounds": 0, + # "submit_tried": False, + "submitted": False, + # "ans": dict(), + "rollback_stack": "", + } + + + async for event in self.graph.astream( + state, + # recursion_limit could be as large as possible as we have our own limit. + config={"recursion_limit": 10000, "configurable": {"thread_id": "1"}, "callbacks": [self.callback]}, + stream_mode="values", + ): + if (not graph_events) or event["messages"][-1] != graph_events[-1]["messages"][-1]: + #print(f"Last message: {graph_events[-1]['messages']}") + event["messages"][-1].pretty_print() + graph_events.append(event) + last_state = self.graph.get_state(config=graph_config) + if last_state.values["submitted"]: + logger.info(f"[Loop {self.loop_count}] Agent submitted, breaking loop.") + break + + self.loop_count += 1 + + # print(f"================{last_state.values['num_steps']}===============") + + return last_state + + +def build_default_diagnosis_agent(): + file_parent_dir = Path(__file__).resolve().parent + diagnosis_agent_config_path = file_parent_dir.parent / "configs" / "diagnosis_agent_config.yaml" + diagnosis_agent_config = yaml.safe_load(open(diagnosis_agent_config_path, "r")) + max_step = diagnosis_agent_config["max_step"] + prompt_path = file_parent_dir.parent / "configs" / diagnosis_agent_config["prompts_path"] + sync_tools = [] + async_tools = [] + tool_descriptions = "" + if diagnosis_agent_config["sync_tools"] is not None: + for sync_tool_struct in diagnosis_agent_config["sync_tools"]: + sync_tools.append(str_to_tool(sync_tool_struct)) + tool_descriptions += ( + f"tool name: {sync_tool_struct["name"]}" + + "\n\n" + + f"tool descriptions {sync_tool_struct["description"]}" + + "\n\n" + ) + else: + sync_tools = None + if diagnosis_agent_config["async_tools"] is not None: + for async_tool_struct in diagnosis_agent_config["async_tools"]: + async_tools.append(str_to_tool(async_tool_struct)) + tool_descriptions += ( + f"tool name: {async_tool_struct["name"]}" + + "\n\n" + + f"tool description: {async_tool_struct["description"]}" + + "\n\n" + ) + else: + async_tools = None + + submit_tool = str_to_tool( + { + "name": "submit_tool", + "description": """ + The tool to submit benchmark results + + Args: + ans (str): the answer you would like to submit to the benchmark + """, + } + ) + + agent = DiagnosisAgent( + llm=get_llm_backend_for_tools(), + max_step=max_step, + sync_tools=sync_tools, + async_tools=async_tools, + submit_tool=submit_tool, + tool_descs=tool_descriptions, + ) + agent.build_agent() + agent.save_agent_graph_to_png() + return agent, prompt_path, max_step + + +async def single_run_with_predefined_prompts(init_prompts): + agent, prompt_path, max_step = build_default_diagnosis_agent() + res = await agent.arun(init_prompts) + logger.info("Clearing agent's memory") + agent.clear_memory() + return agent, res diff --git a/clients/stratus/stratus_agent/driver/__init__.py b/clients/stratus/stratus_agent/driver/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/clients/stratus/stratus_agent/driver/driver.py b/clients/stratus/stratus_agent/driver/driver.py new file mode 100644 index 0000000..822ecc0 --- /dev/null +++ b/clients/stratus/stratus_agent/driver/driver.py @@ -0,0 +1,598 @@ +import asyncio +import time + +# for parsing return values from benchmark app info as python dict +from ast import literal_eval +from datetime import datetime +from pathlib import Path +from typing import List + +import pandas as pd +import requests +import yaml +from langchain_core.messages import HumanMessage, SystemMessage + +from logger import init_logger + +init_logger() + +import logging + +from clients.stratus.configs.langgraph_tool_configs import LanggraphToolConfig +from clients.stratus.stratus_agent.diagnosis_agent import single_run_with_predefined_prompts as diagnosis_single_run +from clients.stratus.stratus_agent.localization_agent import ( + single_run_with_predefined_prompts as localization_single_run, +) +from clients.stratus.stratus_agent.mitigation_agent import ( + generate_run_summary, +) +from clients.stratus.stratus_agent.mitigation_agent import retry_run_with_feedback as mitigation_agent_retry_run +from clients.stratus.stratus_agent.mitigation_agent import ( + single_run_with_predefined_prompts as mitigation_agent_single_run, +) +from clients.stratus.stratus_agent.rollback_agent import main as rollback_agent_main +from clients.stratus.stratus_utils.get_logger import get_logger +from clients.stratus.tools.submit_tool import manual_submit_tool +from clients.stratus.weak_oracles.base_oracle import BaseOracle, OracleResult +from clients.stratus.weak_oracles.cluster_state_oracle import ClusterStateOracle +from clients.stratus.weak_oracles.workload_oracle import WorkloadOracle + +logger = logging.getLogger("all.stratus.driver") +logger.propagate = True +logger.setLevel(logging.DEBUG) + + +def get_current_datetime_formatted(): + now = datetime.now() + formatted_datetime = now.strftime("%m-%d_%H-%M") + return formatted_datetime + + +def get_current_datetime_formatted(): + now = datetime.now() + formatted_datetime = now.strftime("%m-%d_%H-%M") + return formatted_datetime + + +async def validate_oracles(oracles: List[BaseOracle]) -> List[bool | List[OracleResult]]: + results = [] + attempt_failed = False + for oracle in oracles: + logger.info(f"validating oracle: {oracle}") + res: OracleResult = await oracle.validate() + if not res.success: + attempt_failed = True + results.append(res) + if attempt_failed: + return [False, results] + return [True, results] + + +def get_app_info(): + ltc = LanggraphToolConfig() + url = ltc.benchmark_app_info_url + try: + response = requests.get(url) + logger.debug(f"Agent gets response: status: {response.status_code}, text: {response.text}") + app_info_str = str(response.text) + logger.debug(f"App info as str: {app_info_str} ") + app_info = literal_eval(app_info_str) + logger.debug(f"App info: {app_info}") + return app_info + except Exception as e: + logger.error(f"[get_app_info] HTTP submission failed: {e}") + return "error" + + +def get_curr_problem(): + ltc = LanggraphToolConfig() + url = ltc.benchmark_current_problem + try: + response = requests.get(url) + logger.info(f"Response status: {response.status_code}, text: {response.text}") + problem_str = str(response.text) + logger.info(f"problem as str: {problem_str}") + problem = literal_eval(problem_str) + logger.info(f"problem info: {problem}") + return problem["problem_id"] + except Exception as e: + logger.error(f"[get_curr_problem] HTTP submission failed: {e}") + return "error" + + +def get_app_class_by_name(app_name): + target_app = "" + if app_name == "Social Network": + from sregym.service.apps.social_network import SocialNetwork + + target_app = SocialNetwork() + elif app_name == "OpenTelemetry Demo Astronomy Shop": + from sregym.service.apps.astronomy_shop import AstronomyShop + + target_app = AstronomyShop() + elif app_name == "Flight Ticket": + from sregym.service.apps.flight_ticket import FlightTicket + + logger.info(f"Flight ticket has never been tested!!") + target_app = FlightTicket() + elif app_name == "Hotel Reservation": + from sregym.service.apps.hotel_reservation import HotelReservation + + target_app = HotelReservation() + elif app_name == "TiDB Cluster with Operator": + from sregym.service.apps.fleet_cast import FleetCast + + logger.info(f"TiDB has never been tested!!") + target_app = FleetCast() + elif app_name == "Train Ticket": + from sregym.service.apps.train_ticket import TrainTicket + + target_app = TrainTicket() + return target_app + + +async def diagnosis_task_main(): + logger.info("loading configs") + file_parent_dir = Path(__file__).resolve().parent.parent + diagnosis_agent_config_path = file_parent_dir.parent / "configs" / "diagnosis_agent_config.yaml" + diagnosis_agent_config = yaml.safe_load(open(diagnosis_agent_config_path, "r")) + diagnosis_agent_max_step = diagnosis_agent_config["max_step"] + diagnosis_agent_prompt_path = file_parent_dir.parent / "configs" / diagnosis_agent_config["prompts_path"] + diagnosis_agent_prompts = yaml.safe_load(open(diagnosis_agent_prompt_path, "r")) + app_info = get_app_info() + app_name = app_info["app_name"] + app_description = app_info["descriptions"] + app_namespace = app_info["namespace"] + first_run_initial_messages = [ + SystemMessage(diagnosis_agent_prompts["system"]), + HumanMessage( + diagnosis_agent_prompts["user"].format( + max_step=diagnosis_agent_max_step, + app_name=app_name, + app_description=app_description, + app_namespace=app_namespace, + ) + ), + ] + start_time = time.perf_counter() + agent, last_state = await diagnosis_single_run(first_run_initial_messages) + agent_time = time.perf_counter() - start_time + agent_exec_stats = dict() + # assuming we only use one model + usage_metadata = next(iter(agent.callback.usage_metadata.items()))[1] + logger.info(f"agent usage metadata: {usage_metadata}") + agent_exec_stats["input_tokens"] = usage_metadata["input_tokens"] + agent_exec_stats["output_tokens"] = usage_metadata["output_tokens"] + agent_exec_stats["total_tokens"] = usage_metadata["total_tokens"] + # assuming time in seconds. + agent_exec_stats["time"] = str(agent_time) + agent_exec_stats["steps"] = last_state.values["num_steps"] + agent_exec_stats["num_retry_attempts"] = "N/A" + agent_exec_stats["rollback_stack"] = "N/A" + agent_exec_stats["oracle_results"] = "N/A" + # agent_exec_stats["last_state"] = last_state + logger.info(f"Finished diagnosis agent run, output dict: {agent_exec_stats}") + return agent_exec_stats + + +async def localization_task_main(): + logger.info("loading configs") + file_parent_dir = Path(__file__).resolve().parent.parent + localization_agent_config_path = file_parent_dir.parent / "configs" / "localization_agent_config.yaml" + localization_agent_config = yaml.safe_load(open(localization_agent_config_path, "r")) + localization_agent_max_step = localization_agent_config["max_step"] + localization_agent_prompt_path = file_parent_dir.parent / "configs" / localization_agent_config["prompts_path"] + localization_agent_prompts = yaml.safe_load(open(localization_agent_prompt_path, "r")) + app_info = get_app_info() + app_name = app_info["app_name"] + app_description = app_info["descriptions"] + app_namespace = app_info["namespace"] + first_run_initial_messages = [ + SystemMessage(localization_agent_prompts["system"]), + HumanMessage( + localization_agent_prompts["user"].format( + max_step=localization_agent_max_step, + app_name=app_name, + app_description=app_description, + app_namespace=app_namespace, + ) + ), + ] + start_time = time.perf_counter() + agent, last_state = await localization_single_run(first_run_initial_messages) + agent_time = time.perf_counter() - start_time + agent_exec_stats = dict() + usage_metadata = next(iter(agent.callback.usage_metadata.items()))[1] + agent_exec_stats["input_tokens"] = usage_metadata["input_tokens"] + agent_exec_stats["output_tokens"] = usage_metadata["output_tokens"] + agent_exec_stats["total_tokens"] = usage_metadata["total_tokens"] + # assuming time in seconds. + agent_exec_stats["time"] = str(agent_time) + agent_exec_stats["steps"] = last_state.values["num_steps"] + agent_exec_stats["num_retry_attempts"] = "N/A" + agent_exec_stats["rollback_stack"] = "N/A" + agent_exec_stats["oracle_results"] = "N/A" + # agent_exec_stats["last_state"] = last_state + logger.info(f"Finished localization agent run, output dict: {agent_exec_stats}") + return agent_exec_stats, last_state + + +async def mitigation_task_main(localization_summary): + # run rollback, reflect, and retry for mitigation and rollback agent + # note: not implementing a `mitigation_task_main()` like other agents above for rollback, reflect, and retry is due to these considerations + # 1. keep each agent's main() method only about running that specific agent's loop until agent's submission + # 2. mitigation agent is special as when we refer to "mitigation" as a task for the Stratus agent, we refer to the + # rollback, reflect, retry pipeline, which uses rollback agent too. Implementing logic about rollback agent + # inside mitigation agent's method seems against good SE practice. + + # getting some configs + logger.info("loading configs") + file_parent_dir = Path(__file__).resolve().parent.parent + mitigation_agent_config_path = file_parent_dir.parent / "configs" / "mitigation_agent_config.yaml" + mitigation_agent_config = yaml.safe_load(open(mitigation_agent_config_path, "r")) + mitigation_agent_max_step = mitigation_agent_config["max_step"] + mitigation_agent_prompt_path = file_parent_dir.parent / "configs" / mitigation_agent_config["prompts_path"] + mitigation_agent_max_retry_attempts = mitigation_agent_config["max_retry_attempts"] + mitigation_agent_retry_mode = mitigation_agent_config["retry_mode"] + + rollback_agent_config_path = file_parent_dir.parent / "configs" / "rollback_agent_config.yaml" + rollback_agent_config = yaml.safe_load(open(rollback_agent_config_path, "r")) + rollback_agent_max_step = rollback_agent_config["max_step"] + rollback_agent_prompt_path = file_parent_dir.parent / "configs" / rollback_agent_config["prompts_path"] + + llm_summarization_prompt_file = file_parent_dir.parent / "configs" / "llm_summarization_prompt.yaml" + llm_summarization_prompt = yaml.safe_load(open(llm_summarization_prompt_file, "r"))["mitigation_retry_prompt"] + mitigation_agent_prompts = yaml.safe_load(open(mitigation_agent_prompt_path, "r")) + + # oracle + logger.info("setting up oracles") + cluster_state_oracle = ClusterStateOracle() + oracles = [cluster_state_oracle] + + # setting up workload oracle, need to interact with benchmark. + logger.info("getting app info") + app_info = get_app_info() + app_name = app_info["app_name"] + app_description = app_info["descriptions"] + app_namespace = app_info["namespace"] + target_app = get_app_class_by_name(app_name) + if app_name not in ["Social Network", "Hotel Reservation"]: + logger.info("Current app does not support workload oracle") + else: + logger.info(f"adding oracle for app [{app_name}]") + workload_oracle = WorkloadOracle(target_app) + oracles.append(workload_oracle) + + # defining the first set of messages that all retry mode share + first_run_initial_messages = [ + SystemMessage(mitigation_agent_prompts["system"]), + HumanMessage( + mitigation_agent_prompts["user"].format( + max_step=mitigation_agent_max_step, + faults_info=localization_summary, + app_name=app_name, + app_description=app_description, + app_namespace=app_namespace, + ) + ), + ] + start_time = time.perf_counter() + logger.info(f"running in retry mode: [{mitigation_agent_retry_mode}]") + # mitigation task in plain English: + if mitigation_agent_retry_mode == "none": + # if the retry mode is none, just run mitigation agent once. + agent, last_state = await mitigation_agent_single_run(first_run_initial_messages) + agent_time = time.perf_counter() - start_time + agent_exec_stats = dict() + agent_exec_stats["agent_name"] = "mitigation_agent_none" + usage_metadata = next(iter(agent.callback.usage_metadata.items()))[1] + agent_exec_stats["input_tokens"] = usage_metadata["input_tokens"] + agent_exec_stats["output_tokens"] = usage_metadata["output_tokens"] + agent_exec_stats["total_tokens"] = usage_metadata["total_tokens"] + # assuming time in seconds. + agent_exec_stats["time"] = str(agent_time) + agent_exec_stats["steps"] = last_state.values["num_steps"] + agent_exec_stats["num_retry_attempts"] = "N/A" + agent_exec_stats["rollback_stack"] = "N/A" + agent_exec_stats["oracle_results"] = "N/A" + # agent_exec_stats["last_state"] = last_state + logger.info(f"Finished localization agent run, output dict: {agent_exec_stats}") + return agent_exec_stats + + elif mitigation_agent_retry_mode == "naive": + # if the retry mode is naive, run mitigation agent with retry but no rollback agent. + curr_attempt = 0 + last_state = "" + oracle_results = OracleResult( + success=False, issues=["This is the beginning of mitigation, please observe the cluster for issues."] + ) + agent_exec_stats = dict() + agent_names_lst = [] + input_tokens_lst = [] + output_tokens_lst = [] + total_tokens_lst = [] + time_lst = [] + steps_lst = [] + num_retry_attempts_lst = [] + rollback_stack_lst = [] + oracle_results_lst = [] + while curr_attempt < mitigation_agent_max_retry_attempts: + logger.info(f"current attempt: {curr_attempt + 1}/{mitigation_agent_max_retry_attempts}") + agent, last_state = await mitigation_agent_single_run(first_run_initial_messages) + + # recording post-run data + agent_time = time.perf_counter() - start_time + agent_names_lst.append("mitigation_agent_naive") + usage_metadata = next(iter(agent.callback.usage_metadata.items()))[1] + input_tokens_lst.append(usage_metadata["input_tokens"]) + output_tokens_lst.append(usage_metadata["output_tokens"]) + total_tokens_lst.append(usage_metadata["total_tokens"]) + time_lst.append(str(agent_time)) + steps_lst.append(last_state.values["num_steps"]) + num_retry_attempts_lst.append(str(curr_attempt)) + rollback_stack_lst.append("N/A, naive retry") + + # getting oracle result + oracle_results = await validate_oracles(oracles) + oracle_results_lst.append(str(oracle_results)) + logger.info(f"oracle results: {oracle_results}") + if oracle_results[0] is True: + # agent succeeds, let's finish here. + logger.info("agent succeeds, breaking!") + break + # otherwise, naively retry + logger.info(f"agent failed, retrying... {curr_attempt + 1}/{mitigation_agent_max_retry_attempts}") + curr_attempt += 1 + agent_exec_stats["agent_names"] = agent_names_lst + agent_exec_stats["input_tokens"] = input_tokens_lst + agent_exec_stats["output_tokens"] = output_tokens_lst + agent_exec_stats["time"] = time_lst + agent_exec_stats["total_tokens"] = total_tokens_lst + agent_exec_stats["steps"] = steps_lst + agent_exec_stats["num_retry_attempts"] = num_retry_attempts_lst + agent_exec_stats["rollback_stack"] = rollback_stack_lst + agent_exec_stats["oracle_results"] = oracle_results_lst + return agent_exec_stats + elif mitigation_agent_retry_mode == "validate": + logger.info(f"retry mode: [{mitigation_agent_retry_mode}]") + # if the retry mode is validation, run mitigation agent with rollback and weak oracle. + # each start of new agent trial, the agent should receive the last run's oracle results + # and some reflections as input + curr_attempt = 0 + mitigation_agent_last_state = "" + rollback_agent_last_state = "" + oracle_results = OracleResult( + success=False, issues=["This is the beginning of mitigation, please observe the cluster for issues."] + ) + + agent_exec_stats = dict() + agent_names_lst = [] + input_tokens_lst = [] + output_tokens_lst = [] + total_tokens_lst = [] + time_lst = [] + steps_lst = [] + num_retry_attempts_lst = [] + rollback_stack_lst = [] + oracle_results_lst = [] + + # starting retry loop + while curr_attempt < mitigation_agent_max_retry_attempts: + if curr_attempt == 0: + logger.info(f"running first try") + agent, mitigation_agent_last_state = await mitigation_agent_single_run(first_run_initial_messages) + else: + logger.info( + f"running retries. current attempt: {curr_attempt + 1}/{mitigation_agent_max_retry_attempts}" + ) + # we compose the retry prompts here. + last_run_summary = generate_run_summary(mitigation_agent_last_state, llm_summarization_prompt) + retry_run_initial_messages = [ + SystemMessage(mitigation_agent_prompts["system"]), + HumanMessage( + mitigation_agent_prompts["user"].format( + max_step=mitigation_agent_max_step, + faults_info=localization_summary, + app_name=app_name, + app_description=app_description, + app_namespace=app_namespace, + ) + + "\n\n" + + mitigation_agent_prompts["retry_user"].format( + last_result=str(oracle_results), + reflection=last_run_summary, + ) + ), + ] + logger.info(f"composed retry prompts: {retry_run_initial_messages}") + agent, mitigation_agent_last_state = await mitigation_agent_retry_run(retry_run_initial_messages) + + # recording post-run data + agent_time = time.perf_counter() - start_time + agent_names_lst.append("mitigation_agent_validate") + usage_metadata = next(iter(agent.callback.usage_metadata.items()))[1] + input_tokens_lst.append(usage_metadata["input_tokens"]) + output_tokens_lst.append(usage_metadata["output_tokens"]) + total_tokens_lst.append(usage_metadata["total_tokens"]) + time_lst.append(str(agent_time)) + steps_lst.append(mitigation_agent_last_state.values["num_steps"]) + num_retry_attempts_lst.append(str(curr_attempt)) + rollback_stack_lst.append("N/A, mitigation agent") + + # getting oracle result + oracle_results = await validate_oracles(oracles) + oracle_results_lst.append(str(oracle_results)) + has_succeeded = oracle_results[0] + if has_succeeded: + # agent succeeds, let's finish here. + logger.info("agent succeeds! manually submitting for the agent") + await manual_submit_tool("") + logger.info("breaking the retry loop") + break + # return agent_exec_stats + else: + # here the agent fails, we make decision if we should retry + should_retry = curr_attempt + 1 < mitigation_agent_max_retry_attempts + logger.info(f"agent failed, should we retry? {"Yes!" if should_retry else "No!"}") + if should_retry: + # we should retry as we have more trials left + logger.info( + f"we should retry as we have more attempts left. attempts left: {(mitigation_agent_max_retry_attempts - 1) - (curr_attempt + 1)}" + ) + # rollback all changes + # rollback agent is stateless and "best effort" idempotent, just rollback + # memory is cleared in the retry_run() method, so the agent can start anew. + logger.info(f"agent failed, retrying... {curr_attempt + 1}/{mitigation_agent_max_retry_attempts}") + logger.info(f"running rollback agent to reverse progress") + rollback_start_time = time.perf_counter() + rollback_agent, rollback_agent_last_state = await rollback_agent_main() + rollback_end_time = time.perf_counter() - rollback_start_time + agent_names_lst.append("rollback_agent") + usage_metadata = next(iter(rollback_agent.callback.usage_metadata.items()))[1] + input_tokens_lst.append(usage_metadata["input_tokens"]) + output_tokens_lst.append(usage_metadata["output_tokens"]) + total_tokens_lst.append(usage_metadata["total_tokens"]) + time_lst.append(str(rollback_end_time)) + steps_lst.append(rollback_agent_last_state.values["num_steps"]) + num_retry_attempts_lst.append(str(curr_attempt)) + rollback_stack_lst.append(rollback_agent_last_state.values["rollback_stack"]) + oracle_results_lst.append(str("N/A, rollback agent")) + curr_attempt += 1 + else: + logger.info("we shouldn't retry as we don't have more attempts left.") + logger.info(f"making a real submission for the agent.") + await manual_submit_tool("") + break + # return agent_exec_stats + + agent_exec_stats["agent_name"] = agent_names_lst + agent_exec_stats["input_tokens"] = input_tokens_lst + agent_exec_stats["output_tokens"] = output_tokens_lst + agent_exec_stats["total_tokens"] = total_tokens_lst + agent_exec_stats["time"] = time_lst + agent_exec_stats["steps"] = steps_lst + agent_exec_stats["num_retry_attempts"] = num_retry_attempts_lst + agent_exec_stats["rollback_stack"] = rollback_stack_lst + agent_exec_stats["oracle_results"] = oracle_results_lst + return agent_exec_stats + + +async def main(): + # run diagnosis agent 2 times + # here, running the file's main function should suffice. + # 1 for noop diagnosis + current_problem = get_curr_problem() + + # logger.info("*" * 25 + f" Testing {current_problem} ! " + "*" * 25) + # logger.info("*" * 25 + f" Testing {current_problem} ! " + "*" * 25) + # logger.info("*" * 25 + f" Testing {current_problem} ! " + "*" * 25) + agent_output_df = pd.DataFrame() + agent_names = [] + agent_in_tokens = [] + agent_out_tokens = [] + agent_total_tokens = [] + agent_times = [] + agent_steps = [] + agent_retry_attempts = [] + agent_rollback_stack = [] + agent_oracle_results = [] + # logger.info("*" * 25 + " Starting [diagnosis agent] for [NOOP detection] " + "*" * 25) + # diagnosis_agent_exec_stats = await diagnosis_task_main() + # agent_names.append("diagnosis_agent_noop") + # agent_in_tokens.append(diagnosis_agent_exec_stats["input_tokens"]) + # agent_out_tokens.append(diagnosis_agent_exec_stats["output_tokens"]) + # agent_total_tokens.append(diagnosis_agent_exec_stats["total_tokens"]) + # agent_times.append(diagnosis_agent_exec_stats["time"]) + # agent_steps.append(diagnosis_agent_exec_stats["steps"]) + # agent_retry_attempts.append(diagnosis_agent_exec_stats["num_retry_attempts"]) + # agent_rollback_stack.append(diagnosis_agent_exec_stats["rollback_stack"]) + # agent_oracle_results.append(diagnosis_agent_exec_stats["oracle_results"]) + # logger.info("*" * 25 + " Finished [diagnosis agent] " + "*" * 25) + # logger.info("sleeping for a minute for fault propagation") + # await asyncio.sleep(60) + + # 1 for faulty diagnosis + # logger.info("*" * 25 + " Starting [diagnosis agent] for [Faulty detection] " + "*" * 25) + # diagnosis_agent_exec_stats = await diagnosis_task_main() + # agent_names.append("diagnosis_agent_faulty") + # agent_in_tokens.append(diagnosis_agent_exec_stats["input_tokens"]) + # agent_out_tokens.append(diagnosis_agent_exec_stats["output_tokens"]) + # agent_total_tokens.append(diagnosis_agent_exec_stats["total_tokens"]) + # agent_times.append(diagnosis_agent_exec_stats["time"]) + # agent_steps.append(diagnosis_agent_exec_stats["steps"]) + # agent_retry_attempts.append(diagnosis_agent_exec_stats["num_retry_attempts"]) + # agent_rollback_stack.append(diagnosis_agent_exec_stats["rollback_stack"]) + # agent_oracle_results.append(diagnosis_agent_exec_stats["oracle_results"]) + # logger.info("*" * 25 + " Finished [diagnosis agent] " + "*" * 25) + + # run localization agent 1 time for localization + # (BTS it's just diagnosis agent with different prompts) + # here, running the file's main function should suffice + logger.info("*" * 25 + " Starting [localization agent] for [localization] " + "*" * 25) + localization_agent_exec_stats, localization_agent_last_state = await localization_task_main() + agent_names.append("localization_agent") + agent_in_tokens.append(localization_agent_exec_stats["input_tokens"]) + agent_out_tokens.append(localization_agent_exec_stats["output_tokens"]) + agent_total_tokens.append(localization_agent_exec_stats["total_tokens"]) + agent_times.append(localization_agent_exec_stats["time"]) + agent_steps.append(localization_agent_exec_stats["steps"]) + agent_retry_attempts.append(localization_agent_exec_stats["num_retry_attempts"]) + agent_rollback_stack.append(localization_agent_exec_stats["rollback_stack"]) + agent_oracle_results.append(localization_agent_exec_stats["oracle_results"]) + logger.info("*" * 25 + " Finished [localization agent] " + "*" * 25) + + file_parent_dir = Path(__file__).resolve().parent.parent + localization_agent_config_path = file_parent_dir.parent / "configs" / "localization_agent_config.yaml" + localization_agent_config = yaml.safe_load(open(localization_agent_config_path, "r")) + localization_agent_prompt_path = file_parent_dir.parent / "configs" / localization_agent_config["prompts_path"] + localization_agent_prompts = yaml.safe_load(open(localization_agent_prompt_path, "r")) + localization_fault_summary = generate_run_summary( + localization_agent_last_state, localization_agent_prompts["localization_summary_prompt"] + ) + + # run mitigation task 1 time for mitigation + # it includes retry logics + logger.info("*" * 25 + " Starting [mitigation agent] for [mitigation] " + "*" * 25) + mitigation_agent_exec_stats = await mitigation_task_main(localization_fault_summary) + agent_names.extend(mitigation_agent_exec_stats["agent_name"]) + agent_in_tokens.extend(mitigation_agent_exec_stats["input_tokens"]) + agent_out_tokens.extend(mitigation_agent_exec_stats["output_tokens"]) + agent_total_tokens.extend(mitigation_agent_exec_stats["total_tokens"]) + agent_times.extend(mitigation_agent_exec_stats["time"]) + agent_steps.extend(mitigation_agent_exec_stats["steps"]) + agent_retry_attempts.extend(mitigation_agent_exec_stats["num_retry_attempts"]) + agent_rollback_stack.extend(mitigation_agent_exec_stats["rollback_stack"]) + agent_oracle_results.extend(mitigation_agent_exec_stats["oracle_results"]) + logger.info("*" * 25 + " Finished [mitigation agent] " + "*" * 25) + + for lst in [ + agent_names, + agent_in_tokens, + agent_out_tokens, + agent_total_tokens, + agent_times, + agent_steps, + agent_retry_attempts, + agent_rollback_stack, + agent_oracle_results, + ]: + logger.info("list length: " + str(len(lst))) + + agent_output_df["agent_name"] = agent_names + agent_output_df["input_tokens"] = agent_in_tokens + agent_output_df["output_tokens"] = agent_out_tokens + agent_output_df["total_tokens"] = agent_total_tokens + agent_output_df["time"] = agent_times + agent_output_df["steps"] = agent_steps + agent_output_df["num_retry_attempts"] = agent_retry_attempts + agent_output_df["rollback_stack"] = agent_rollback_stack + agent_output_df["oracle_results"] = agent_oracle_results + current_datetime = get_current_datetime_formatted() + agent_output_df.to_csv(f"./{current_datetime}_{current_problem}_stratus_output.csv", index=False, header=True) + logger.info("*" * 25 + f" Finished Testing {current_problem} ! " + "*" * 25) + logger.info("*" * 25 + f" Finished Testing {current_problem} ! " + "*" * 25) + logger.info("*" * 25 + f" Finished Testing {current_problem} ! " + "*" * 25) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/clients/stratus/stratus_agent/localization_agent.py b/clients/stratus/stratus_agent/localization_agent.py new file mode 100644 index 0000000..e733a84 --- /dev/null +++ b/clients/stratus/stratus_agent/localization_agent.py @@ -0,0 +1,79 @@ +import asyncio +import logging +from pathlib import Path + +import yaml + +from clients.stratus.llm_backend.init_backend import get_llm_backend_for_tools +from clients.stratus.stratus_agent.diagnosis_agent import DiagnosisAgent +from clients.stratus.stratus_utils.get_starting_prompt import get_starting_prompts +from clients.stratus.stratus_utils.str_to_tool import str_to_tool + +logger = logging.getLogger("all.stratus.localization") +logger.propagate = True +logger.setLevel(logging.DEBUG) + + +def build_default_localization_agent(): + file_parent_dir = Path(__file__).resolve().parent + localization_agent_config_path = file_parent_dir.parent / "configs" / "localization_agent_config.yaml" + localization_agent_config = yaml.safe_load(open(localization_agent_config_path, "r")) + max_step = localization_agent_config["max_step"] + prompt_path = file_parent_dir.parent / "configs" / localization_agent_config["prompts_path"] + sync_tools = [] + async_tools = [] + tool_descriptions = "" + if localization_agent_config["sync_tools"] is not None: + for sync_tool_struct in localization_agent_config["sync_tools"]: + sync_tools.append(str_to_tool(sync_tool_struct)) + tool_descriptions += ( + f"tool name: {sync_tool_struct["name"]}" + + "\n\n" + + f"tool descriptions {sync_tool_struct["description"]}" + + "\n\n" + ) + else: + sync_tools = None + if localization_agent_config["async_tools"] is not None: + for async_tool_struct in localization_agent_config["async_tools"]: + async_tools.append(str_to_tool(async_tool_struct)) + tool_descriptions += ( + f"tool name: {async_tool_struct["name"]}" + + "\n\n" + + f"tool description: {async_tool_struct["description"]}" + + "\n\n" + ) + else: + async_tools = None + + submit_tool = str_to_tool( + { + "name": "submit_tool", + "description": """ + The tool to submit benchmark results + + Args: + ans (str): Use natural language to describe the root cause of the failure. + """, + } + ) + + agent = DiagnosisAgent( + llm=get_llm_backend_for_tools(), + max_step=max_step, + sync_tools=sync_tools, + async_tools=async_tools, + submit_tool=submit_tool, + tool_descs=tool_descriptions, + ) + agent.build_agent() + agent.save_agent_graph_to_png() + return agent, prompt_path, max_step + + +async def single_run_with_predefined_prompts(init_prompts): + agent, prompt_path, max_step = build_default_localization_agent() + res = await agent.arun(init_prompts) + logger.info("Clearing agent's memory") + agent.clear_memory() + return agent, res diff --git a/clients/stratus/stratus_agent/mitigation_agent.py b/clients/stratus/stratus_agent/mitigation_agent.py new file mode 100644 index 0000000..aab1b50 --- /dev/null +++ b/clients/stratus/stratus_agent/mitigation_agent.py @@ -0,0 +1,233 @@ +import asyncio +from pathlib import Path +from typing import List +import logging +import yaml +from langchain_core.callbacks import UsageMetadataCallbackHandler +from langchain_core.messages import HumanMessage, SystemMessage +from langgraph.checkpoint.memory import MemorySaver +from langgraph.constants import END, START +from langgraph.types import StateSnapshot + +from clients.stratus.llm_backend.init_backend import get_llm_backend_for_tools +from clients.stratus.stratus_agent.base_agent import BaseAgent +from clients.stratus.stratus_agent.state import State +from clients.stratus.stratus_utils.str_to_tool import str_to_tool +from clients.stratus.tools.stratus_tool_node import StratusToolNode + +logger = logging.getLogger("all.stratus.mitigation") +logger.propagate = True +logger.setLevel(logging.DEBUG) + + +class MitigationAgent(BaseAgent): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.tool_node = None + self.max_step = kwargs.get("max_step", 20) + self.loop_count = 0 + self.local_logger = logging.getLogger("all.stratus.mitigation") + + def build_agent(self): + self.tool_node = StratusToolNode(async_tools=self.async_tools, sync_tools=self.sync_tools) + + self.graph_builder.add_node(self.thinking_prompt_inject_node, self.llm_thinking_prompt_inject_step) + self.graph_builder.add_node(self.tool_calling_prompt_inject_node, self.llm_tool_call_prompt_inject_step) + self.graph_builder.add_node(self.thinking_node, self.llm_thinking_step) + self.graph_builder.add_node(self.tool_calling_node, self.llm_tool_call_step) + self.graph_builder.add_node(self.process_tool_call_node, self.tool_node) + self.graph_builder.add_node(self.post_round_process_node, self.post_round_process) + self.graph_builder.add_node(self.force_submit_prompt_inject_node, self.llm_force_submit_thinking_step) + self.graph_builder.add_node(self.force_submit_tool_call_node, self.llm_force_submit_tool_call_step) + + self.graph_builder.add_edge(START, self.thinking_prompt_inject_node) + self.graph_builder.add_edge(self.thinking_prompt_inject_node, self.thinking_node) + self.graph_builder.add_edge(self.thinking_node, self.tool_calling_prompt_inject_node) + self.graph_builder.add_edge(self.tool_calling_prompt_inject_node, self.tool_calling_node) + self.graph_builder.add_edge(self.tool_calling_node, self.process_tool_call_node) + self.graph_builder.add_edge(self.process_tool_call_node, self.post_round_process_node) + self.graph_builder.add_conditional_edges( + self.process_tool_call_node, + self.should_submit_router, + { + self.force_submit_prompt_inject_node: self.force_submit_prompt_inject_node, + self.post_round_process_node: self.post_round_process_node, + }, + ) + # TODO: Before submitting, run oracle to see if really mitigated. + self.graph_builder.add_edge(self.force_submit_prompt_inject_node, self.force_submit_tool_call_node) + self.graph_builder.add_edge(self.force_submit_tool_call_node, END) + self.graph_builder.add_edge(self.post_round_process_node, END) + + self.memory_saver = MemorySaver() + self.graph = self.graph_builder.compile(checkpointer=self.memory_saver) + + async def arun(self, starting_prompts): + """ + Async running an agent + + Args: + starting_prompts (dict): The data inside the dict will be filled into the prompts. + + Returns: + final state of the agent running, including messages and other state values. + """ + if not self.graph: + raise ValueError("Agent graph is None. Have you built the agent?") + + if len(starting_prompts) == 0: + raise ValueError("No prompts used to start the conversation!") + + # Log starting prompts in full to arena logger + all_init_prompts = "" + for prompt in starting_prompts: + all_init_prompts += prompt.content + "\n" + self.arena_logger.info(f"[PROMPT] \n {all_init_prompts}") + + graph_events = [] + while True: + graph_config = {"configurable": {"thread_id": "1"}} + logger.info(f"{'-' * 20} [Loop {self.loop_count}] {'-' * 20}") + last_state = self.graph.get_state(config=graph_config) + if len(last_state.values) != 0: + logger.debug(f"[Loop {self.loop_count}] There were last {len(last_state.values)} states.") + # this is all the previous msgs the agent had, we just inherit them in the next graph traversal + state = last_state.values + else: + logger.debug(f"[Loop {self.loop_count}] There were no states.") + # fresh agent start, init state here + state = { + "messages": starting_prompts, + # "workdir": "", + # "curr_file": "", + # "curr_line": 0, + "num_steps": 0, + # "rec_submission_rounds": 0, + # "submit_tried": False, + "submitted": False, + # "ans": dict(), + "rollback_stack": "", + } + + async for event in self.graph.astream( + state, + # recursion_limit could be as large as possible as we have our own limit. + config={"recursion_limit": 10000, "configurable": {"thread_id": "1"}, "callbacks": [self.callback]}, + stream_mode="values", + ): + if (not graph_events) or event["messages"] != graph_events[-1]["messages"]: + event["messages"][-1].pretty_print() + graph_events.append(event) + last_state = self.graph.get_state(config=graph_config) + if last_state.values["submitted"]: + logger.info(f"[Loop {self.loop_count}] Agent submitted, breaking loop.") + break + + self.loop_count += 1 + + return last_state + + +def build_default_mitigation_agent(): + # agent config and init setup + file_parent_dir = Path(__file__).resolve().parent + mitigation_agent_config_path = file_parent_dir.parent / "configs" / "mitigation_agent_config.yaml" + mitigation_agent_config = yaml.safe_load(open(mitigation_agent_config_path, "r")) + mitigation_agent_max_step = mitigation_agent_config["max_step"] + mitigation_agent_prompt_path = file_parent_dir.parent / "configs" / mitigation_agent_config["prompts_path"] + + mitigation_agent_sync_tools = [] + mitigation_agent_async_tools = [] + mitigation_agent_tool_descriptions = "" + if mitigation_agent_config["sync_tools"] is not None: + for sync_tool_struct in mitigation_agent_config["sync_tools"]: + mitigation_agent_sync_tools.append(str_to_tool(sync_tool_struct)) + mitigation_agent_tool_descriptions += ( + f"tool name: {sync_tool_struct["name"]}" + + "\n\n" + + f"tool descriptions {sync_tool_struct["description"]}" + + "\n\n" + ) + else: + mitigation_agent_sync_tools = None + if mitigation_agent_config["async_tools"] is not None: + for async_tool_struct in mitigation_agent_config["async_tools"]: + mitigation_agent_async_tools.append(str_to_tool(async_tool_struct)) + mitigation_agent_tool_descriptions += ( + f"tool name: {async_tool_struct["name"]}" + + "\n\n" + + f"tool description: {async_tool_struct["description"]}" + + "\n\n" + ) + else: + mitigation_agent_async_tools = None + + submit_tool = str_to_tool( + { + "name": "submit_tool", + "description": """ + The tool to submit benchmark results + + Args: + ans (str): the answer you would like to submit to the benchmark + """, + } + ) + + # defining mitigation agent + mitigation_agent = MitigationAgent( + llm=get_llm_backend_for_tools(), + max_step=mitigation_agent_max_step, + sync_tools=mitigation_agent_sync_tools, + async_tools=mitigation_agent_async_tools, + submit_tool=submit_tool, + tool_descs=mitigation_agent_tool_descriptions, + ) + mitigation_agent.build_agent() + mitigation_agent.save_agent_graph_to_png() + return mitigation_agent, mitigation_agent_prompt_path, mitigation_agent_max_step + + +def generate_run_summary(last_state: StateSnapshot, summary_system_prompt) -> str: + """ + Returns a SystemMessage and a HumanMessage as a list. They are summaries and reflections of a given last run + `last_state`. + Ideally, we only need to summarize the last 20 (or all of them if less than 20) messages from the agent + + Args: + last_state (State): the state from last run + Returns: + a list of SystemMessage and HumanMessage representing the reflections + """ + llm = get_llm_backend_for_tools() + logger.info("asking LLM to summarize and reflect last run") + last_run_msgs = last_state.values.get("messages", None) + summary_input_messages = [ + SystemMessage(summary_system_prompt), + HumanMessage(f"Here are the list of messages happened in the last conversation. \n\n {last_run_msgs}"), + ] + if last_run_msgs is None: + raise RuntimeError("StateSnapshot must contain messages!") + res = llm.inference(summary_input_messages) + res = res.content + return res + + +async def single_run_with_predefined_prompts(init_prompts): + agent, prompt_path, max_step = build_default_mitigation_agent() + res = await agent.arun(init_prompts) + logger.info("Clearing agent's memory") + agent.clear_memory() + return agent, res + + +async def retry_run_with_feedback(feedback_prompts): + agent, prompt_path, max_step = build_default_mitigation_agent() + res = await agent.arun(feedback_prompts) + logger.info("Clearing agent's memory") + agent.clear_memory() + return agent, res + + +if __name__ == "__main__": + logger.info("Mitigation agent does not support running as a module.") diff --git a/clients/stratus/stratus_agent/rollback_agent.py b/clients/stratus/stratus_agent/rollback_agent.py new file mode 100644 index 0000000..817a31a --- /dev/null +++ b/clients/stratus/stratus_agent/rollback_agent.py @@ -0,0 +1,125 @@ +import asyncio +import logging +from pathlib import Path + +import yaml +from langchain_core.callbacks import UsageMetadataCallbackHandler +from langgraph.checkpoint.memory import MemorySaver +from langgraph.constants import END +from langgraph.graph import START + +from clients.stratus.llm_backend.init_backend import get_llm_backend_for_tools +from clients.stratus.stratus_agent.base_agent import BaseAgent +from clients.stratus.stratus_utils.get_starting_prompt import get_starting_prompts +from clients.stratus.stratus_utils.str_to_tool import str_to_tool +from clients.stratus.tools.stratus_tool_node import StratusToolNode + +logger = logging.getLogger("all.stratus.rollback") +logger.propagate = True +logger.setLevel(logging.DEBUG) + + +class RollbackAgent(BaseAgent): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.tool_node = None + self.loop_count = 0 + self.local_logger = logging.getLogger("all.stratus.rollback") + + def build_agent(self): + self.tool_node = StratusToolNode( + async_tools=self.async_tools, + sync_tools=self.sync_tools, + ) + + self.graph_builder.add_node(self.thinking_prompt_inject_node, self.llm_thinking_prompt_inject_step) + self.graph_builder.add_node(self.tool_calling_prompt_inject_node, self.llm_tool_call_prompt_inject_step) + self.graph_builder.add_node(self.thinking_node, self.llm_thinking_step) + self.graph_builder.add_node(self.tool_calling_node, self.llm_tool_call_step) + self.graph_builder.add_node(self.process_tool_call_node, self.tool_node) + self.graph_builder.add_node(self.post_round_process_node, self.post_round_process) + self.graph_builder.add_node(self.force_submit_tool_call_node, self.llm_force_submit_tool_call_node) + + self.graph_builder.add_edge(START, self.thinking_prompt_inject_node) + self.graph_builder.add_edge(self.thinking_prompt_inject_node, self.thinking_node) + self.graph_builder.add_edge(self.thinking_node, self.tool_calling_prompt_inject_node) + self.graph_builder.add_edge(self.tool_calling_prompt_inject_node, self.tool_calling_node) + self.graph_builder.add_edge(self.tool_calling_node, self.process_tool_call_node) + self.graph_builder.add_edge(self.process_tool_call_node, self.post_round_process_node) + self.graph_builder.add_conditional_edges( + self.process_tool_call_node, + self.should_submit_router, + { + self.force_submit_tool_call_node: self.force_submit_tool_call_node, + self.post_round_process_node: self.post_round_process_node, + }, + ) + self.graph_builder.add_edge(self.force_submit_tool_call_node, END) + self.graph_builder.add_edge(self.post_round_process_node, END) + + self.memory_saver = MemorySaver() + self.graph = self.graph_builder.compile(checkpointer=self.memory_saver) + + +async def main(): + file_parent_dir = Path(__file__).resolve().parent + rollback_agent_config_path = file_parent_dir.parent / "configs" / "rollback_agent_config.yaml" + rollback_agent_config = yaml.safe_load(open(rollback_agent_config_path, "r")) + max_step = rollback_agent_config["max_step"] + prompt_path = file_parent_dir.parent / "configs" / rollback_agent_config["prompts_path"] + sync_tools = [] + async_tools = [] + tool_descriptions = "" + if rollback_agent_config["sync_tools"] is not None: + for sync_tool_struct in rollback_agent_config["sync_tools"]: + sync_tools.append(str_to_tool(sync_tool_struct)) + tool_descriptions += ( + f"tool name: {sync_tool_struct["name"]}" + + "\n\n" + + f"tool descriptions {sync_tool_struct["description"]}" + + "\n\n" + ) + else: + sync_tools = None + if rollback_agent_config["async_tools"] is not None: + for async_tool_struct in rollback_agent_config["async_tools"]: + async_tools.append(str_to_tool(async_tool_struct)) + tool_descriptions += ( + f"tool name: {async_tool_struct["name"]}" + + "\n\n" + + f"tool description: {async_tool_struct["description"]}" + + "\n\n" + ) + else: + async_tools = None + + submit_tool = str_to_tool( + { + "name": "submit_tool", + "description": """ + The tool to submit benchmark results + + Args: + ans (str): the answer you would like to submit to the benchmark + """, + } + ) + + agent = RollbackAgent( + llm=get_llm_backend_for_tools(), + max_step=max_step, + sync_tools=sync_tools, + async_tools=async_tools, + submit_tool=submit_tool, + tool_descs=tool_descriptions, + ) + agent.build_agent() + agent.save_agent_graph_to_png() + + res = await agent.arun(get_starting_prompts(prompt_path, max_step=max_step)) + agent.clear_memory() + return agent, res + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/clients/stratus/stratus_agent/state.py b/clients/stratus/stratus_agent/state.py new file mode 100644 index 0000000..2e6d119 --- /dev/null +++ b/clients/stratus/stratus_agent/state.py @@ -0,0 +1,21 @@ +from langgraph.graph import add_messages +from typing_extensions import Annotated, TypedDict + + +class State(TypedDict): + # Messages have the type "list". The `add_messages` function + # in the annotation defines how this state key should be updated + # (in this case, it appends messages to the list, rather than overwriting them) + messages: Annotated[list, add_messages] + # workdir: str + # curr_file: str + # curr_line: int + # number or rounds used to finish assigned tasks + # num_rounds: int + num_steps: int + # number of rounds used for rectifying submission + # rec_submission_rounds: int + submitted: bool + # submit_tried: bool + # ans: dict + rollback_stack: str diff --git a/clients/stratus/stratus_utils/__init__.py b/clients/stratus/stratus_utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/clients/stratus/stratus_utils/ai_msg_mock_utils.py b/clients/stratus/stratus_utils/ai_msg_mock_utils.py new file mode 100644 index 0000000..78a378a --- /dev/null +++ b/clients/stratus/stratus_utils/ai_msg_mock_utils.py @@ -0,0 +1,53 @@ +ai_msg_tpl = { + "content": "", + "additional_kwargs": { + "tool_calls": [ + { + "id": "call_osNIUg8kE7psP360dHinqNbm", + "function": { + "arguments": "", + "name": "", + }, + "type": "function", + } + ], + "refusal": None, + }, + "response_metadata": { + "token_usage": { + "completion_tokens": 39, + "prompt_tokens": 588, + "total_tokens": 627, + "completion_tokens_details": { + "accepted_prediction_tokens": 0, + "audio_tokens": 0, + "reasoning_tokens": 0, + "rejected_prediction_tokens": 0, + }, + "prompt_tokens_details": {"audio_tokens": 0, "cached_tokens": 0}, + }, + "model_name": "gpt-4o-2024-08-06", + "system_fingerprint": "fp_07871e2ad8", + "service_tier": "default", + "finish_reason": "tool_calls", + "logprobs": None, + }, + "tool_calls": [ + { + "name": "", + "args": { + "path": "", + "line_number": "", + }, + "id": "call_osNIUg8kE7psP360dHinqNbm", + "type": "tool_call", + } + ], + "usage_metadata": { + "input_tokens": 588, + "output_tokens": 39, + "total_tokens": 627, + "input_token_details": {"audio": 0, "cache_read": 0}, + "output_token_details": {"audio": 0, "reasoning": 0}, + }, +} diff --git a/clients/stratus/stratus_utils/get_logger.py b/clients/stratus/stratus_utils/get_logger.py new file mode 100644 index 0000000..8c7b8be --- /dev/null +++ b/clients/stratus/stratus_utils/get_logger.py @@ -0,0 +1,7 @@ +import logging + + +def get_logger(): + logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + logger = logging.getLogger(__name__) + return logger diff --git a/clients/stratus/stratus_utils/get_starting_prompt.py b/clients/stratus/stratus_utils/get_starting_prompt.py new file mode 100644 index 0000000..99a9d46 --- /dev/null +++ b/clients/stratus/stratus_utils/get_starting_prompt.py @@ -0,0 +1,16 @@ +import yaml +from langchain_core.messages import AIMessage, HumanMessage, SystemMessage + + +def get_starting_prompts(prompt_path, max_step): + with open(prompt_path, "r") as prompt_file: + prompts = yaml.safe_load(prompt_file) + sys_prompt = prompts["system"] + user_prompt = prompts["user"].format(max_step=max_step) + prompts = [] + if sys_prompt: + prompts.append(SystemMessage(sys_prompt)) + if user_prompt: + prompts.append(HumanMessage(user_prompt)) + + return prompts diff --git a/clients/stratus/stratus_utils/str_to_tool.py b/clients/stratus/stratus_utils/str_to_tool.py new file mode 100644 index 0000000..4fd00f6 --- /dev/null +++ b/clients/stratus/stratus_utils/str_to_tool.py @@ -0,0 +1,72 @@ +"""Converts tools in str into tool objects""" + +import os +import uuid + +from fastmcp import Client +from fastmcp.client import SSETransport +from langchain_core.tools import BaseTool + +from clients.stratus.stratus_utils.get_logger import get_logger +from clients.stratus.tools.jaeger_tools import get_dependency_graph, get_operations, get_services, get_traces +from clients.stratus.tools.kubectl_tools import ( + ExecKubectlCmdSafely, + ExecReadOnlyKubectlCmd, + GetPreviousRollbackableCmd, + RollbackCommand, +) +from clients.stratus.tools.localization import get_resource_uid +from clients.stratus.tools.prometheus_tools import get_metrics +from clients.stratus.tools.submit_tool import fake_submit_tool, rollback_submit_tool, submit_tool +from clients.stratus.tools.wait_tool import wait_tool + +logger = get_logger() + + +def get_client(): + session_id = str(uuid.uuid4()) + transport = SSETransport( + url=f"{os.getenv("MCP_SERVER_URL", "http://localhost:9954")}/kubectl_mcp_tools/sse", + headers={"sregym_ssid": session_id}, + ) + client = Client(transport) + return client + + +def str_to_tool(tool_struct: dict[str, str]): + if tool_struct["name"] == "get_traces": + return get_traces + elif tool_struct["name"] == "get_services": + return get_services + elif tool_struct["name"] == "get_operations": + return get_operations + elif tool_struct["name"] == "get_dependency_graph": + return get_dependency_graph + elif tool_struct["name"] == "get_metrics": + return get_metrics + elif tool_struct["name"] == "get_resource_uid": + return get_resource_uid + elif tool_struct["name"] == "submit_tool": + return submit_tool + elif tool_struct["name"] == "f_submit_tool": + return fake_submit_tool + elif tool_struct["name"] == "r_submit_tool": + return rollback_submit_tool + elif tool_struct["name"] == "wait_tool": + return wait_tool + elif tool_struct["name"] == "exec_read_only_kubectl_cmd": + client = get_client() + exec_read_only_kubectl_cmd = ExecReadOnlyKubectlCmd(client) + return exec_read_only_kubectl_cmd + elif tool_struct["name"] == "exec_kubectl_cmd_safely": + client = get_client() + exec_kubectl_cmd_safely = ExecKubectlCmdSafely(client) + return exec_kubectl_cmd_safely + elif tool_struct["name"] == "rollback_command": + client = get_client() + rollback_command = RollbackCommand(client) + return rollback_command + elif tool_struct["name"] == "get_previous_rollbackable_cmd": + client = get_client() + get_previous_rollbackable_cmd = GetPreviousRollbackableCmd(client) + return get_previous_rollbackable_cmd diff --git a/clients/stratus/stratus_utils/truncate_by_token.py b/clients/stratus/stratus_utils/truncate_by_token.py new file mode 100644 index 0000000..628af74 --- /dev/null +++ b/clients/stratus/stratus_utils/truncate_by_token.py @@ -0,0 +1,23 @@ +import tiktoken + + +def truncate_to_tokens(text: str, max_tokens: int = 6000, model: str = "gpt-4o-mini"): + try: + enc = tiktoken.encoding_for_model(model) + except KeyError: + # Fallback that works for most modern OpenAI chat models + enc = tiktoken.get_encoding("cl100k_base") + + tokens = enc.encode(text) + if len(tokens) <= max_tokens: + return text, len(tokens) + + # Truncate and decode back to string + truncated_text = enc.decode(tokens[:max_tokens]) + + # Optional safety pass to ensure token count is <= max_tokens after decoding + retokens = enc.encode(truncated_text) + if len(retokens) > max_tokens: + truncated_text = enc.decode(retokens[:max_tokens]) + + return truncated_text diff --git a/clients/stratus/tools/__init__.py b/clients/stratus/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/clients/stratus/tools/basic_tool_node.py b/clients/stratus/tools/basic_tool_node.py new file mode 100644 index 0000000..a15a00d --- /dev/null +++ b/clients/stratus/tools/basic_tool_node.py @@ -0,0 +1,43 @@ +import asyncio +import logging + +from langchain_core.messages import ToolMessage +from langchain_core.tools import BaseTool + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + + +class BasicToolNode: + """A node that runs the tools requested in the last AIMessage.""" + + def __init__(self, node_tools: list[BaseTool], is_async: bool) -> None: + self.tools_by_name = {t.name: t for t in node_tools} + self.is_async = is_async + + async def __call__(self, inputs: dict): + if messages := inputs.get("messages", []): + message = messages[-1] + else: + raise ValueError("No message found in input") + logger.info(f"BasicToolNode: {message}") + outputs = [] + for tool_call in message.tool_calls: + # tool_call["args"]["tool_call_id"] = tool_call["id"] + # tool_call["args"].pop("id", None) + logger.info(f"invoking tool: {tool_call["name"]}, tool_call: {tool_call}") + if self.is_async: + tool_call["args"].update({"state": inputs}) + tool_result = await self.tools_by_name[tool_call["name"]].ainvoke(tool_call) + tool_call["args"].pop("state", None) + else: + tool_result = self.tools_by_name[tool_call["name"]].invoke(tool_call["args"]) + logger.info(f"tool_result: {tool_result}") + outputs.append( + ToolMessage( + content=tool_result, + name=tool_call["name"], + tool_call_id=tool_call["id"], + ) + ) + return {"messages": outputs} diff --git a/clients/stratus/tools/compile/compile_tool.py b/clients/stratus/tools/compile/compile_tool.py new file mode 100644 index 0000000..1b9df41 --- /dev/null +++ b/clients/stratus/tools/compile/compile_tool.py @@ -0,0 +1,59 @@ +import logging +import os.path +import subprocess +from pathlib import Path +from typing import Annotated + +from langchain_core.messages import ToolMessage +from langchain_core.tools import InjectedToolCallId, tool +from langgraph.prebuilt import InjectedState + +from clients.stratus.tools.text_editing.flake8_utils import flake8, format_flake8_output # type: ignore +from clients.stratus.tools.text_editing.windowed_file import ( # type: ignore + FileNotOpened, + TextNotFound, + WindowedFile, +) + + +@tool("compile_postgresql_server", description="Compile PostgreSQL server code") +def compile_postgresql_server( + tool_call_id: Annotated[str, InjectedToolCallId] = "", + state: Annotated[dict, InjectedState] = None, +) -> str: + """Compile PostgreSQL server code.""" + logger = logging.getLogger(__name__) + logger.info("Compiling PostgreSQL server code...") + logger.info(f"State: {state}") + + workdir = Path(state.get("workdir", "")).resolve() + logger.info(f"Work directory: {workdir}") + + if not workdir.exists(): + return f"Work directory {workdir} does not exist. Please set the workdir in the state." + + env = os.environ.copy() + env["PATH"] = str(Path.home() / "pgsql/bin") + ":" + env["PATH"] + homedir = str(Path.home()) + logger.info(f"Home directory: {homedir}") + + if not workdir.exists(): + return f"Work directory {workdir} does not exist. Please set the workdir in the state." + + cmds = [ + f"./configure --prefix={workdir}/pgsql --without-icu", + "make > /dev/null 2>&1", + "make install > /dev/null 2>&1", + f"{homedir}/pgsql/bin/initdb -D {homedir}/pgsql/data2", + f"{homedir}/pgsql/bin/pg_ctl -D {homedir}/pgsql/data2 -l logfile start", + f"{homedir}/pgsql/bin/createdb test", + f"{homedir}/pgsql/bin/psql -d test -c '\\l'", + ] + + output = "" + for cmd in cmds: + process = subprocess.run(cmd, cwd=workdir, capture_output=True, shell=True, text=True, env=env) + output += f"$ {cmd}\n{process.stdout}\n{process.stderr}\n" + logger.info(f"Command: {cmd}") + logger.info(f"Output: {process.stdout}") + return ToolMessage(tool_call_id=tool_call_id, content=output) diff --git a/clients/stratus/tools/jaeger_tools.py b/clients/stratus/tools/jaeger_tools.py new file mode 100644 index 0000000..b4d9ddc --- /dev/null +++ b/clients/stratus/tools/jaeger_tools.py @@ -0,0 +1,248 @@ +import logging +from contextlib import AsyncExitStack +from typing import Annotated + +from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage +from langchain_core.tools import InjectedToolCallId, tool +from langgraph.types import Command +from mcp import ClientSession +from mcp.client.sse import sse_client + +from clients.stratus.configs.langgraph_tool_configs import LanggraphToolConfig +from clients.stratus.llm_backend.init_backend import get_llm_backend_for_tools +from clients.stratus.stratus_utils.truncate_by_token import truncate_to_tokens +from clients.stratus.tools.text_editing.flake8_utils import flake8, format_flake8_output # type: ignore +from clients.stratus.tools.text_editing.windowed_file import ( # type: ignore + FileNotOpened, + TextNotFound, + WindowedFile, +) + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger("all.stratus.tools.jaeger") + +langgraph_tool_config = LanggraphToolConfig() + +get_traces_docstring = """Get Jaeger traces for a given service in the last n minutes. + + Args: + service (str): The name of the service for which to retrieve trace data. + last_n_minutes (int): The time range (in minutes) to look back from the current time. +""" + + +@tool(description=get_traces_docstring) +async def get_traces(service: str, last_n_minutes: int, tool_call_id: Annotated[str, InjectedToolCallId]) -> Command: + + logging.info(f"Getting traces for service {service} in the last {last_n_minutes} minutes") + + exit_stack = AsyncExitStack() + logger.info("Using HTTP, connecting to server.") + server_url = langgraph_tool_config.jaeger_mcp_url + http_transport = await exit_stack.enter_async_context(sse_client(url=server_url)) + session = await exit_stack.enter_async_context(ClientSession(*http_transport)) + + await session.initialize() + + result = await session.call_tool( + "get_traces", + arguments={ + "service": service, + "last_n_minutes": last_n_minutes, + }, + ) + await exit_stack.aclose() + result = result.content[0].text + # if langgraph_tool_config.use_summaries and len(traces) >= langgraph_tool_config.min_len_to_sum: + # logger.info("Using summaries for traces.") + # traces = _summarize_traces(traces) + result = truncate_to_tokens(result) + return Command( + update={ + "messages": [ + ToolMessage( + content=str(result), + tool_call_id=tool_call_id, + ), + ] + } + ) + + +def _summarize_traces(traces): + logger.info("=== _summarize_traces called ===") + + system_prompt = """ + You are a tool for a Site Reliability Engineering team. Currently, the team faces an incident in the cluster and needs to fix it ASAP. + Your job is to analyze and summarize given microservice traces, given in format of dictionaries. + Read the given traces. Summarize the traces. Analyze what could be the root cause of the incident. + Be succinct and concise. Include important traces that reflects the root cause of the incident in format of raw traces as strings, no need to prettify the json. + DO NOT truncate the traces. + + Return your response in this format: + SERVICE NAME: + SUMMARY: + + STRICTLY FOLLOW THIS FORMAT + + """ + llm = get_llm_backend_for_tools() + messages = [ + SystemMessage(content=system_prompt), + HumanMessage(content=traces), + ] + + traces_summary = llm.inference(messages=messages) + logger.info(f"Traces summary: {traces_summary}") + return traces_summary + + +def _summarize_operations(operations): + logger.info("=== _summarize_operations called ===") + + system_prompt = """ + You are a tool for a Site Reliability Engineering team. Currently, the team faces an incident in the cluster and needs to fix it ASAP. + Your job is to analyze and summarize given microservice operations, given in format of dictionaries. + Read the given operations. Summarize the operations. Analyze what could be the root cause of the incident. + Be succinct and concise. + + Return your response in this format: + SERVICE NAME: + SUMMARY: + + STRICTLY FOLLOW THIS FORMAT + + """ + llm = get_llm_backend_for_tools() + messages = [ + SystemMessage(content=system_prompt), + HumanMessage(content=operations), + ] + + operations_summary = llm.inference(messages=messages) + logger.info(f"Operations summary: {operations_summary}") + return operations_summary + + +get_services_docstring = """ +Retrieve the list of service names from the Grafana instance. + + Args: + + Returns: + List[str]: A list of service names available in Grafana. +""" + + +@tool(description=get_services_docstring) +async def get_services(tool_call_id: Annotated[str, InjectedToolCallId]) -> Command: + + logger.info(f"calling mcp get_services from langchain get_services") + exit_stack = AsyncExitStack() + logger.info("Using HTTP, connecting to server.") + server_url = langgraph_tool_config.jaeger_mcp_url + http_transport = await exit_stack.enter_async_context(sse_client(url=server_url)) + session = await exit_stack.enter_async_context(ClientSession(*http_transport)) + + await session.initialize() + + result = await session.call_tool("get_services") + await exit_stack.aclose() + # services = result.content[0].text + logger.debug(f"Result from get_services mcp tools: f{result}") + return Command( + update={ + "messages": [ + ToolMessage( + content=result, + tool_call_id=tool_call_id, + ), + ] + } + ) + + +get_operations_docstring = """ +Query available operations for a specific service from the Grafana instance. + + Args: + service (str): The name of the service whose operations should be retrieved. + + Returns: + List[str]: A list of operation names associated with the specified service. +""" + + +@tool(description=get_operations_docstring) +async def get_operations( + service: str, + tool_call_id: Annotated[str, InjectedToolCallId], +) -> Command: + + logger.info(f"calling mcp get_operations from langchain get_operations with service {service}") + exit_stack = AsyncExitStack() + logger.info("Using HTTP, connecting to server.") + server_url = langgraph_tool_config.jaeger_mcp_url + http_transport = await exit_stack.enter_async_context(sse_client(url=server_url)) + session = await exit_stack.enter_async_context(ClientSession(*http_transport)) + + await session.initialize() + + result = await session.call_tool( + "get_operations", + arguments={"service": service}, + ) + await exit_stack.aclose() + # operations = result.content[0].text + # if langgraph_tool_config.use_summaries and len(operations) >= langgraph_tool_config.min_len_to_sum: + # logger.info("Using summaries for operations.") + # operations = _summarize_operations(operations) + return Command( + update={ + "messages": [ + ToolMessage(content=result, tool_call_id=tool_call_id), + ] + } + ) + + +get_dependency_graph_docstring = """ + Get service dependency graph from Jaeger's native dependencies API. + Args: + last_n_minutes (int): The time range (in minutes) to look back from the current time. + Returns: + str: JSON object representing the dependency graph. +""" + + +@tool(description=get_dependency_graph_docstring) +async def get_dependency_graph( + last_n_minutes: str, + tool_call_id: Annotated[str, InjectedToolCallId], +) -> Command: + + logger.info(f"calling mcp get_dependency_graph from langchain get_dependency_graph") + exit_stack = AsyncExitStack() + logger.info("Using HTTP, connecting to server.") + server_url = langgraph_tool_config.jaeger_mcp_url + http_transport = await exit_stack.enter_async_context(sse_client(url=server_url)) + session = await exit_stack.enter_async_context(ClientSession(*http_transport)) + + await session.initialize() + + result = await session.call_tool( + "get_dependency_graph", + arguments={"last_n_minutes": last_n_minutes}, + ) + await exit_stack.aclose() + # operations = result.content[0].text + # if langgraph_tool_config.use_summaries and len(operations) >= langgraph_tool_config.min_len_to_sum: + # logger.info("Using summaries for operations.") + # operations = _summarize_operations(operations) + return Command( + update={ + "messages": [ + ToolMessage(content=result, tool_call_id=tool_call_id), + ] + } + ) diff --git a/clients/stratus/tools/kubectl_tools.py b/clients/stratus/tools/kubectl_tools.py new file mode 100644 index 0000000..ca7b658 --- /dev/null +++ b/clients/stratus/tools/kubectl_tools.py @@ -0,0 +1,234 @@ +import logging +from typing import Annotated, Any, Optional + +from fastmcp import Client +from langchain_core.messages import ToolMessage +from langchain_core.tools import InjectedToolCallId +from langchain_core.tools.base import ArgsSchema, BaseTool +from langgraph.types import Command +from pydantic import BaseModel, Field, PrivateAttr + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger("all.stratus.tools") + + +class ExecKubectlCmdSafelyInput(BaseModel): + command: str = Field( + description="The command you want to execute in a CLI to manage a k8s cluster. " + "It should start with 'kubectl'. Converts natural language to kubectl commands and executes them. " + "Can be used to get/describe/edit Kubernetes deployments, services, and other Kubernetes components. " + "Only takes one query at a time. Keep queries simple and straight-forward. " + "This tool cannot handle complex mutli-step queries. " + "Remember that most kubectl queries require a namespace name. " + ) + tool_call_id: Annotated[str, InjectedToolCallId] + + +class ExecKubectlCmdSafely(BaseTool): + name: str = "exec_kubectl_cmd_safely" + description: str = "this is a tool used to safely execute kubectl commands." + args_schema: Optional[ArgsSchema] = ExecKubectlCmdSafelyInput + + _client: Client = PrivateAttr() + + def __init__(self, client: Client, **kwargs: Any): + super().__init__(**kwargs) + self._client = client + + def _run(self): + assert False, f"{self.name} is an async method, you are running it as a sync method!" + pass + + async def _arun( + self, + command: str, + tool_call_id: Annotated[str, InjectedToolCallId], + ) -> Command: + logger.debug(f"tool_call_id in {self.name}: {tool_call_id}") + logger.debug( + f"calling mcp exec_kubectl_cmd_safely from " f'langchain exec_kubectl_cmd_safely, with command: "{command}"' + ) + async with self._client: + result = await self._client.call_tool("exec_kubectl_cmd_safely", arguments={"cmd": command}) + text_result = "\n".join([part.text for part in result]) + return Command( + update={ + "messages": [ + ToolMessage(content=text_result, tool_call_id=tool_call_id), + ] + } + ) + + +kubectl_read_only_cmds = [ + "kubectl api-resources", + "kubectl api-version", + # read only if not interactive (interactive commands are prohibited) + "kubectl attach", + "kubectl auth can-i", + "kubectl cluster-info", + "kubectl describe", + "kubectl diff", + "kubectl events", + "kubectl explain", + "kubectl get", + "kubectl logs", + "kubectl options", + "kubectl top", + "kubectl version", + "kubectl config view", + "kubectl config current-context", + "kubectl config get", +] + + +class ExecReadOnlyKubectlCmdInput(BaseModel): + command: str = Field( + description=f"The read-only kubectl command you want to execute in a CLI " + 'to manage a k8s cluster. It should start with "kubectl". ' + f"Available Read-only Commands: {kubectl_read_only_cmds}" + ) + tool_call_id: Annotated[str, InjectedToolCallId] + + +class ExecReadOnlyKubectlCmd(BaseTool): + name: str = "exec_read_only_kubectl_cmd" + description: str = "this is a tool used to execute read-only kubectl commands." + args_schema: Optional[ArgsSchema] = ExecReadOnlyKubectlCmdInput + + _client: Client = PrivateAttr() + + def __init__(self, client: Client, **kwargs: Any): + super().__init__(**kwargs) + self._client = client + + def _run(self): + assert False, f"{self.name} is an async method, you are running it as a sync method!" + pass + + async def _arun( + self, + command: str, + tool_call_id: Annotated[str, InjectedToolCallId], + ) -> Command: + logger.debug(f"tool_call_id in {self.name}: {tool_call_id}") + is_read_only = False + for c in kubectl_read_only_cmds: + if command.startswith(c): + is_read_only = True + break + if not is_read_only: + logger.debug( + f"Agent is trying to exec a non read-only command {command} " f"with tool exec_read_only_kubectl_cmd" + ) + text_result = ( + f"Your command {command} is not a read-only kubectl command. " + f"Available Read-only Commands: {kubectl_read_only_cmds}." + ) + elif command.startswith("kubectl logs -f"): + logger.debug(f"agent calling interactive read-only command") + text_result = ( + f"Your command {command} is an _interactive_ read-only kubectl command. " f"It is not supported!" + ) + else: + logger.debug( + f"calling mcp exec_kubectl_cmd_safely from " + f'langchain exec_read_only_kubectl_cmd, with command: "{command}"' + ) + async with self._client: + result = await self._client.call_tool("exec_kubectl_cmd_safely", arguments={"cmd": command}) + text_result = "\n".join([part.text for part in result]) + return Command( + update={ + "messages": [ + ToolMessage(content=text_result, tool_call_id=tool_call_id), + ] + } + ) + + +class RollbackCommandCmdInput(BaseModel): + tool_call_id: Annotated[str, InjectedToolCallId] + + +class RollbackCommand(BaseTool): + name: str = "rollback_command" + description: str = ( + "Use this function to roll back the last kubectl command " + 'you successfully executed with the "exec_kubectl_cmd_safely" tool.' + ) + args_schema: Optional[ArgsSchema] = RollbackCommandCmdInput + + _client: Client = PrivateAttr() + + def __init__(self, client: Client, **kwargs: Any): + super().__init__(**kwargs) + self._client = client + + def _run(self): + assert False, f"{self.name} is an async method, you are running it as a sync method!" + pass + + async def _arun( + self, + tool_call_id: Annotated[str, InjectedToolCallId], + ) -> Command: + logger.debug(f"tool_call_id in {self.name}: {tool_call_id}") + logger.debug(f"calling langchain rollback_command") + async with self._client: + result = await self._client.call_tool("rollback_command") + text_result = "\n".join([part.text for part in result]) + return Command( + update={ + "rollback_stack": str(text_result), + "messages": [ + ToolMessage(content=text_result, tool_call_id=tool_call_id), + ], + } + ) + + +class GetPreviousRollbackableCmdInput(BaseModel): + tool_call_id: Annotated[str, InjectedToolCallId] + + +class GetPreviousRollbackableCmd(BaseTool): + name: str = "get_previous_rollbackable_cmd" + description: str = ( + "Use this function to get a list of commands you " + "previously executed that could be roll-backed. " + 'When you call "rollback_command" tool multiple times, ' + "you will roll-back previous commands in the order " + "of the returned list." + ) + args_schema: Optional[ArgsSchema] = GetPreviousRollbackableCmdInput + + _client: Client = PrivateAttr() + + def __init__(self, client: Client, **kwargs: Any): + super().__init__(**kwargs) + self._client = client + + def _run(self): + assert False, f"{self.name} is an async method, you are running it as a sync method!" + pass + + async def _arun( + self, + tool_call_id: Annotated[str, InjectedToolCallId], + ) -> Command: + logger.debug(f"tool_call_id in {self.name}: {tool_call_id}") + logger.debug(f"calling langchain get_previous_rollbackable_cmd") + async with self._client: + result = await self._client.call_tool("get_previous_rollbackable_cmd") + if len(result) == 0: + text_result = "There is no previous rollbackable command." + else: + text_result = "\n".join([part.text for part in result]) + return Command( + update={ + "messages": [ + ToolMessage(content=text_result, tool_call_id=tool_call_id), + ] + } + ) diff --git a/clients/stratus/tools/localization.py b/clients/stratus/tools/localization.py new file mode 100644 index 0000000..dc4dc67 --- /dev/null +++ b/clients/stratus/tools/localization.py @@ -0,0 +1,59 @@ +import asyncio +import subprocess +from contextlib import AsyncExitStack +from typing import Annotated + +from langchain_core.messages import ToolMessage +from langchain_core.tools import InjectedToolCallId, tool +from langgraph.types import Command +from mcp import ClientSession +from mcp.client.sse import sse_client + +from clients.stratus.configs.langgraph_tool_configs import LanggraphToolConfig + +langgraph_tool_config = LanggraphToolConfig() + +localization_tool_docstring = """ +Use this tool to retrieve the UID of a specified resource. + + Args: + resource_type (str): The type of the resource (e.g., 'pod', 'service', 'deployment'). + resource_name (str): The name of the resource. + namespace (str): The namespace of the resource. + Returns: + str: The UID of the specified resource. +""" + + +@tool(description=localization_tool_docstring) +async def get_resource_uid( + resource_type: str, + resource_name: str, + namespace: str, + tool_call_id: Annotated[str, InjectedToolCallId], +) -> Command: + exit_stack = AsyncExitStack() + server_url = langgraph_tool_config.submit_mcp_url + http_transport = await exit_stack.enter_async_context(sse_client(url=server_url)) + session = await exit_stack.enter_async_context(ClientSession(*http_transport)) + await session.initialize() + result = await session.call_tool( + "localization", + arguments={ + "resource_type": resource_type, + "resource_name": resource_name, + "namespace": namespace, + }, + ) + await exit_stack.aclose() + uid = result.content[0].text + return Command( + update={ + "messages": [ + ToolMessage( + content=uid, + tool_call_id=tool_call_id, + ), + ] + } + ) diff --git a/clients/stratus/tools/prometheus_tools.py b/clients/stratus/tools/prometheus_tools.py new file mode 100644 index 0000000..3c6a7d7 --- /dev/null +++ b/clients/stratus/tools/prometheus_tools.py @@ -0,0 +1,127 @@ +import logging +from contextlib import AsyncExitStack +from typing import Annotated + +from langchain_core.messages import HumanMessage, SystemMessage, ToolMessage +from langchain_core.tools import InjectedToolCallId, tool +from langgraph.types import Command +from mcp import ClientSession +from mcp.client.sse import sse_client + +from clients.stratus.configs.langgraph_tool_configs import LanggraphToolConfig +from clients.stratus.llm_backend.init_backend import get_llm_backend_for_tools +from clients.stratus.stratus_utils.truncate_by_token import truncate_to_tokens +from clients.stratus.tools.text_editing.flake8_utils import flake8, format_flake8_output # type: ignore +from clients.stratus.tools.text_editing.windowed_file import ( # type: ignore + FileNotOpened, + TextNotFound, + WindowedFile, +) + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + +langgraph_tool_config = LanggraphToolConfig() + +get_metrics_docstring = """ +Query real-time metrics data from the Prometheus instance. + + Args: + query (str): A Prometheus Query Language (PromQL) expression used to fetch metric values. + + Returns: + dict: The raw Prometheus response containing metric results, including timestamps, values, and labels. +""" + + +@tool(description=get_metrics_docstring) +async def get_metrics( + query: str, + tool_call_id: Annotated[str, InjectedToolCallId], +) -> Command: + + logger.info(f"get_metrics called with query: {query}") + logger.info("Calling MCP get_metrics from langchain get_metrics") + exit_stack = AsyncExitStack() + logger.info("Using HTTP, connecting to server.") + # server_url = "http://127.0.0.1:9953/sse" + server_url = langgraph_tool_config.prometheus_mcp_url + # Register both the SSE client and session with an async exit stack so they will automatically clean up when + # you're done (e.g. close connections properly + + # opens the actual communication channel to the MCP server + # Connect to the SSE stream + # Wrap that connection in a ClientSession so you can call MCP tools + # Automatically clean up everything when the async block finishes + http_transport = await exit_stack.enter_async_context(sse_client(url=server_url)) + session = await exit_stack.enter_async_context(ClientSession(*http_transport)) + + await session.initialize() + + result = await session.call_tool( + "get_metrics", + arguments={ + "query": query, + }, + ) + result = result.content[0].text + logger.info(f"Result: {result}") + # metrics = result.content[0].text + logger.info(f"Metrics received: {result}") + await exit_stack.aclose() + + # if langgraph_tool_config.use_summaries and len(metrics) >= langgraph_tool_config.min_len_to_sum: + # metrics = _summarize_metrics(result) + # # logger.info(f"Summary: {metrics}") + result = truncate_to_tokens(result) + + return Command( + update={ + "messages": [ + ToolMessage(content=str(result), tool_call_id=tool_call_id), + ] + } + ) + + +def _summarize_metrics(metrics): + logger.info("=== _summarize_metrics called ===") + + system_prompt = """ +You are an expert Site Reliability Engineering tool. You are given raw microservice metrics as JSON dictionaries. + +Your task: + +1. Parse the raw metrics to identify potential root causes for incidents. +2. Summarize the metrics succinctly. +3. Provide raw metrics data as strings (do not explain them generically). +4. Use the following output format STRICTLY: + +SERVICE NAME: +SUMMARY: + + +Example: + +SERVICE NAME: auth-service +SUMMARY: +High CPU usage detected (90%+), memory usage stable. Possible cause: infinite loop in request handler. + +Raw metrics: +{"cpu_usage": "95", "memory_usage": "512MB"} + +If you do not have enough data to determine root cause, state 'Insufficient data to determine root cause' and provide raw metrics. +""" + + # logger.info(f"raw metrics received: {metrics}") + llm = get_llm_backend_for_tools() + # then use this `llm` for inference + messages = [ + SystemMessage(content=system_prompt), + HumanMessage(content=metrics.content[0].text), + ] + + metrics_summary = llm.inference(messages=messages) + # metrics_summary = llm.inference(messages=metrics.content[0].text, system_prompt=system_prompt) + logger.info(f"Metrics summary: {metrics_summary}") + return metrics_summary diff --git a/clients/stratus/tools/stateful_async_tool_node.py b/clients/stratus/tools/stateful_async_tool_node.py new file mode 100644 index 0000000..49277f7 --- /dev/null +++ b/clients/stratus/tools/stateful_async_tool_node.py @@ -0,0 +1,36 @@ +import asyncio +import logging + +from langchain_core.tools import BaseTool + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + + +class StatefulAsyncToolNode: + """A node that runs the stateful remote mcp tools requested in the last AIMessage.""" + + def __init__(self, node_tools: list[BaseTool]) -> None: + self.tools_by_name = {t.name: t for t in node_tools} + + async def __call__(self, inputs: dict): + if messages := inputs.get("messages", []): + message = messages[-1] + else: + raise ValueError("No message found in input") + logger.info(f"StatefulAsyncToolNode: {message}") + outputs = [] + for tool_call in message.tool_calls: + logger.info(f"invoking tool: {tool_call['name']}, tool_call: {tool_call}") + tool_result = await self.tools_by_name[tool_call["name"]].ainvoke( + { + "type": "tool_call", + "name": tool_call["name"], + "args": {"state": inputs, **tool_call["args"]}, + "id": tool_call["id"], + } + ) + logger.info(f"tool_result: {tool_result}") + outputs += tool_result.update["messages"] + + return {"messages": outputs} diff --git a/clients/stratus/tools/stratus_tool_node.py b/clients/stratus/tools/stratus_tool_node.py new file mode 100644 index 0000000..f4b7467 --- /dev/null +++ b/clients/stratus/tools/stratus_tool_node.py @@ -0,0 +1,127 @@ +import asyncio +import logging + +from langchain_core.messages import AIMessage, ToolMessage +from langchain_core.tools import BaseTool +from langgraph.types import Command +from pydantic_core import ValidationError + +logger = logging.getLogger("all.stratus.tool_node") +logger.propagate = True +logger.setLevel(logging.DEBUG) + + +def reschedule_tool_calls(tool_calls): + # reschedule the order of tool_calls + rescheduled_tool_calls = [] + submit_tool_call = [] + wait_tool_call = [] + for tool_call in tool_calls: + if tool_call["name"] == "submit_tool": + submit_tool_call.append(tool_call) + elif tool_call["name"] == "wait_tool": + wait_tool_call.append(tool_call) + else: + rescheduled_tool_calls.append(tool_call) + # submit_tool call is scheduled the first; + # wait_tool call is scheduled the last. + rescheduled_tool_calls = submit_tool_call + rescheduled_tool_calls + wait_tool_call + return rescheduled_tool_calls + + +class StratusToolNode: + """A node that runs the tools requested in the last AIMessage.""" + + def __init__(self, sync_tools: list[BaseTool], async_tools: list[BaseTool]) -> None: + self.sync_tools_by_name = {t.name: t for t in sync_tools} if sync_tools is not None else None + self.async_tools_by_name = {t.name: t for t in async_tools} if async_tools is not None else None + + async def __call__(self, inputs: dict): + if messages := inputs.get("messages", []): + message = messages[-1] + else: + raise ValueError("No message found in input") + + if not isinstance(message, AIMessage): + logger.warning( + f"Expected last message to be an AIMessage, but got {type(message)}.\n" f"{inputs.get('messages', [])}" + ) + raise ValueError("Last message is not an AIMessage; skipping tool invocation.") + + arena_logger = logging.getLogger("sregym-global") + if message.content != "": + arena_logger.info(f"[LLM] {message.content}") + # logger.info(f"{message.content}") + + if not getattr(message, "tool_calls", None): + logger.warning("AIMessage does not contain tool_calls.") + return {"messages": []} + + if len(message.tool_calls) > 1: + logger.warning("more than 1 tool call found. Calling in order", extra={"Tool Calls": message.tool_calls}) + logger.warning("technically, only one tool call allowed") + + to_update = dict() + new_messages = [] + for i, tool_call in enumerate(message.tool_calls): + try: + # logger.info(f"[STRATUS_TOOLNODE] invoking tool: {tool_call['name']}, tool_call: {tool_call}") + arg_list = [f"{key} = {value}" for key, value in tool_call["args"].items()] + arena_logger.info(f"[LLM] Agent choose to call: {tool_call['name']}({', '.join(arg_list)})") + logger.info(f"[STRATUS_TOOLNODE] Agent choose to call: {tool_call['name']}({', '.join(arg_list)})") + if tool_call["name"] in self.async_tools_by_name: + tool_result = await self.async_tools_by_name[tool_call["name"]].ainvoke( + { + "type": "tool_call", + "name": tool_call["name"], + "args": {"state": inputs, **tool_call["args"]}, + "id": tool_call["id"], + } + ) + elif tool_call["name"] in self.sync_tools_by_name: + tool_result = self.sync_tools_by_name[tool_call["name"]].invoke( + { + "type": "tool_call", + "name": tool_call["name"], + "args": {"state": inputs, **tool_call["args"]}, + "id": tool_call["id"], + } + ) + else: + logger.info(f"agent tries to call tool that DNE: {tool_call['name']}") + Command( + update={ + "messages": [ + ToolMessage( + content=f"Tool {tool_call['name']} does not exist!", + tool_call_id=tool_call["id"], + ) + ] + } + ) + + assert isinstance( + tool_result, Command + ), f"Tool {tool_call['name']} should return a Command object, but return {type(tool_result)}" + logger.debug(f"[STRATUS_TOOLNODE] tool_result: {tool_result}") + if tool_result.update["messages"]: + combined_content = "\n".join([message.content for message in tool_result.update["messages"]]) + arena_logger.info(f"[ENV] Tool {tool_call['name']} returned: \n {combined_content}") + new_messages += tool_result.update["messages"] + to_update = { + **to_update, + **tool_result.update, # this is the key part + } + except ValidationError as e: + logger.error(f"tool_call: {tool_call}\nError: {e}") + arena_logger.error(f"[ENV] Tool Call {tool_call['name']} failed: \n {e}") + new_messages += [ + ToolMessage( + content=f"Error: {e}; This happens usually because you are " + f"passing inappropriate arguments to the tool.", + tool_call_id=tool_call["id"], + ) + ] + + to_update["messages"] = new_messages + return to_update diff --git a/clients/stratus/tools/submit_tool.py b/clients/stratus/tools/submit_tool.py new file mode 100644 index 0000000..9d27480 --- /dev/null +++ b/clients/stratus/tools/submit_tool.py @@ -0,0 +1,127 @@ +import ast +import logging +from contextlib import AsyncExitStack +from typing import Annotated + +from langchain_core.messages import ToolMessage +from langchain_core.tools import InjectedToolCallId, tool +from langgraph.prebuilt import InjectedState +from langgraph.types import Command +from mcp import ClientSession +from mcp.client.sse import sse_client + +from clients.stratus.configs.langgraph_tool_configs import LanggraphToolConfig +from clients.stratus.stratus_agent.state import State + +submit_tool_docstring = """ +Use this tool to submit your answer to the assigned tasks. You can give partial answer or empty answer + (still of type dict) if you can not solve all of them. + + Args: + ans (string): the answer you would like to submit +""" + +rollback_submit_tool_docstring = """ +The tool to submit after you rolled back all the changes. +""" +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + +langgraph_tool_config = LanggraphToolConfig() + + +@tool(description=submit_tool_docstring) +async def submit_tool( + ans: str, state: Annotated[State, InjectedState], tool_call_id: Annotated[str, InjectedToolCallId] +) -> Command: + # makes http call to benchmark submission server + logging.info(f"submitting to benchmark, answer: {ans}") + + exit_stack = AsyncExitStack() + logger.info("Using HTTP, connecting to server.") + server_url = langgraph_tool_config.submit_mcp_url + http_transport = await exit_stack.enter_async_context(sse_client(url=server_url)) + session = await exit_stack.enter_async_context(ClientSession(*http_transport)) + + await session.initialize() + + result = await session.call_tool( + "submit", + arguments={ + "ans": ans, + }, + ) + result = result.content[0].text + result = ast.literal_eval(result) + + await exit_stack.aclose() + if result["status"] != "200": + logger.info(f"HTTP submission failed: {result}") + logger.info("we don't set submitted to True, to force agent retry submission. \n") + logger.info("giving agent another change by decrementing step count") + return Command( + update={ + "num_steps": state["num_steps"] - 1, + "messages": [ + ToolMessage(content=f"HTTP submission failed: {result}", tool_call_id=tool_call_id), + ], + } + ) + logger.info("submission succeeded.") + return Command( + update={ + "submitted": True, + "messages": [ToolMessage(f"Submission complete. No further action is needed.", tool_call_id=tool_call_id)], + } + ) + + +@tool("f_submit_tool", description=submit_tool_docstring) +async def fake_submit_tool(ans: str, tool_call_id: Annotated[str, InjectedToolCallId]) -> Command: + # makes http call to benchmark submission server + logging.info(f"_NOT_ submitting to benchmark, answer: {ans}") + logger.info(f"This method is to only change the state[submitted] value.") + logger.info(f"mitigation submission is done out side of agent logic, for retry") + + return Command( + update={ + "submitted": True, + "messages": [ToolMessage(f"Submission complete. No further action is needed.", tool_call_id=tool_call_id)], + } + ) + + +@tool("r_submit_tool", description=rollback_submit_tool_docstring) +async def rollback_submit_tool(tool_call_id: Annotated[str, InjectedToolCallId]) -> Command: + logger.info("rollback agent submits") + logger.info(f"This method is to only change the state[submitted] value.") + + return Command( + update={ + "submitted": True, + "messages": [ToolMessage(f"Submission complete. No further action is needed.", tool_call_id=tool_call_id)], + } + ) + + +async def manual_submit_tool(ans: str) -> str: + # makes http call to benchmark submission server + logging.info(f"_manually_ submitting to benchmark, answer: {ans}") + + exit_stack = AsyncExitStack() + logger.info("Using HTTP, connecting to server.") + server_url = langgraph_tool_config.submit_mcp_url + http_transport = await exit_stack.enter_async_context(sse_client(url=server_url)) + session = await exit_stack.enter_async_context(ClientSession(*http_transport)) + + await session.initialize() + + result = await session.call_tool( + "submit", + arguments={ + "ans": ans, + }, + ) + await exit_stack.aclose() + logger.info("Submission complete. No further action is needed.") + return "Submitted" diff --git a/clients/stratus/tools/text_editing/__init__.py b/clients/stratus/tools/text_editing/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/clients/stratus/tools/text_editing/edit.py b/clients/stratus/tools/text_editing/edit.py new file mode 100644 index 0000000..700a17d --- /dev/null +++ b/clients/stratus/tools/text_editing/edit.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python3 + +import argparse +from typing import Annotated + +from langchain_core.tools import InjectedToolCallId, tool +from langgraph.prebuilt import InjectedState +from langgraph.types import Command + +from clients.stratus.tools.text_editing.file_manip import update_file_vars_in_state + +try: + from sweagent import TOOLS_DIR +except ImportError: + pass +else: + import sys + + default_lib = TOOLS_DIR / "defaults" / "lib" + assert default_lib.is_dir() + sys.path.append(str(default_lib)) + sys.path.append(str(TOOLS_DIR / "registry" / "lib")) + +from flake8_utils import flake8, format_flake8_output # type: ignore +from windowed_file import FileNotOpened, TextNotFound, WindowedFile # type: ignore + +RETRY_WITH_OUTPUT_TOKEN = "###SWE-AGENT-RETRY-WITH-OUTPUT###" + +_NOT_FOUND = """Your edit was not applied (file not modified): Text {search!r} not found in displayed lines (or anywhere in the file). +Please modify your search string. Did you forget to properly handle whitespace/indentation? +You can also call `open` again to re-display the file with the correct context. +""" + +_NOT_FOUND_IN_WINDOW_MSG = """Your edit was not applied (file not modified): Text {search!r} not found in displayed lines. + +However, we found the following occurrences of your search string in the file: + +{occurrences} + +You can use the `goto` command to navigate to these locations before running the edit command again. +""" + +_MULTIPLE_OCCURRENCES_MSG = """Your edit was not applied (file not modified): Found more than one occurrence of {search!r} in the currently displayed lines. +Please make your search string more specific (for example, by including more lines of context). +""" + +_NO_CHANGES_MADE_MSG = """Your search and replace strings are the same. No changes were made. Please modify your search or replace strings.""" + +_SINGLE_EDIT_SUCCESS_MSG = """Text replaced. Please review the changes and make sure they are correct: + +1. The edited file is correctly indented +2. The edited file does not contain duplicate lines +3. The edit does not break existing functionality + +Edit the file again if necessary.""" + +_MULTIPLE_EDITS_SUCCESS_MSG = """Replaced {n_replacements} occurrences. Please review the changes and make sure they are correct: + +1. The edited file is correctly indented +2. The edited file does not contain duplicate lines +3. The edit does not break existing functionality + +Edit the file again if necessary.""" + +_LINT_ERROR_TEMPLATE = """Your proposed edit has introduced new syntax error(s). Please read this error message carefully and then retry editing the file. + +ERRORS: + +{errors} + +This is how your edit would have looked if applied +------------------------------------------------ +{window_applied} +------------------------------------------------ + +This is the original code before your edit +------------------------------------------------ +{window_original} +------------------------------------------------ + +Your changes have NOT been applied. Please fix your edit command and try again. +DO NOT re-run the same failed edit command. Running it again will lead to the same error. +""" + + +def get_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser() + parser.add_argument("search", type=str) + parser.add_argument("replace", type=str) + parser.add_argument("replace_all", type=bool, nargs="?", default=False) + return parser + + +@tool("edit") +def edit( + state: Annotated[dict, InjectedState] = None, + tool_call_id: Annotated[str, InjectedToolCallId] = "", + search: str = "", + replace: str = "", + replace_all: bool = "", +) -> Command: + """ + Replace first occurrence of with in the currently displayed lines. + If replace-all is True , replace all occurrences of with . + + For example, if you are looking at this file: + + def fct(): + print("Hello world") + + and you want to edit the file to read: + + def fct(): + print("Hello") + print("world") + + you can search for `Hello world` and replace with `"Hello"\n print("world")` + (note the extra spaces before the print statement!). + + Tips: + + 1. Always include proper whitespace/indentation + 2. When you are adding an if/with/try statement, you need to INDENT the block that follows, so make sure to include it in both your search and replace strings! + 3. If you are wrapping code in a try statement, make sure to also add an 'except' or 'finally' block. + + Before every edit, please + + 1. Explain the code you want to edit and why it is causing the problem + 2. Explain the edit you want to make and how it fixes the problem + 3. Explain how the edit does not break existing functionality + """ + if not isinstance(state["curr_file"], str): + logger.error("INTERNAL: state curr file should be a string") + exit(1) + if len(state["curr_file"]) == 0: + msg_txt = "No file opened. Either `open` or `create` a file first." + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) + + wf = WindowedFile(path=state["curr_file"]) + + # Turn \\n into \n etc., i.e., undo the escaping + # args.replace = args.replace.encode("utf8").decode("unicode_escape") + + if search == replace: + msg_txt = _NO_CHANGES_MADE_MSG + "\n" + RETRY_WITH_OUTPUT_TOKEN + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) + + pre_edit_lint = flake8(wf.path) + + try: + if not replace_all: + window_text = wf.get_window_text() + if window_text.count(search) > 1: + msg_txt = _MULTIPLE_OCCURRENCES_MSG.format(search=search) + "\n" + RETRY_WITH_OUTPUT_TOKEN + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) + replacement_info = wf.replace_in_window(search, replace) + # todo: Should warn if more than one occurrence was found? + else: + # todo: Give overview of all replaced occurrences/number of replacements + replacement_info = wf.replace(search, replace) + except TextNotFound: + line_no_founds = wf.find_all_occurrences(search, zero_based=False) + if line_no_founds: + msg_txt = _NOT_FOUND_IN_WINDOW_MSG.format( + search=search, occurrences="\n".join([f"- line {line_no}" for line_no in line_no_founds]) + ) + else: + msg_txt = _NOT_FOUND.format(search=search) + msg_txt = msg_txt + "\n" + RETRY_WITH_OUTPUT_TOKEN + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) + + post_edit_lint = flake8(wf.path) + + if not replace_all: + # Try to filter out pre-existing errors + replacement_window = ( + replacement_info.first_replaced_line, + replacement_info.first_replaced_line + replacement_info.n_search_lines - 1, + ) + new_flake8_output = format_flake8_output( + post_edit_lint, + previous_errors_string=pre_edit_lint, + replacement_window=replacement_window, + replacement_n_lines=replacement_info.n_replace_lines, + ) + else: + # Cannot easily compare the error strings, because line number changes are hard to keep track of + # So we show all linter errors. + new_flake8_output = format_flake8_output(post_edit_lint) + + if new_flake8_output: + with_edits = wf.get_window_text(line_numbers=True, status_line=True, pre_post_line=True) + wf.undo_edit() + without_edits = wf.get_window_text(line_numbers=True, status_line=True, pre_post_line=True) + msg_txt = _LINT_ERROR_TEMPLATE.format( + errors=new_flake8_output, + window_applied=with_edits, + window_original=without_edits, + ) + msg_txt = msg_txt + "\n" + RETRY_WITH_OUTPUT_TOKEN + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) + if not replace_all: + msg_txt = _SINGLE_EDIT_SUCCESS_MSG + else: + msg_txt = _MULTIPLE_EDITS_SUCCESS_MSG.format(n_replacements=replacement_info.n_replacements) + + msg_txt = msg_txt + "\n\n" + wf.get_window_text(line_numbers=True, status_line=True, pre_post_line=True) + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) diff --git a/clients/stratus/tools/text_editing/file_manip.py b/clients/stratus/tools/text_editing/file_manip.py new file mode 100644 index 0000000..60bc28f --- /dev/null +++ b/clients/stratus/tools/text_editing/file_manip.py @@ -0,0 +1,415 @@ +import logging +import os.path +from pathlib import Path +from typing import Annotated, Optional, Union + +from langchain_core.messages import AIMessage, HumanMessage, ToolMessage +from langchain_core.tools import InjectedToolCallId, tool +from langgraph.prebuilt import InjectedState +from langgraph.types import Command + +from clients.stratus.stratus_agent.state import State +from clients.stratus.tools.text_editing.flake8_utils import flake8, format_flake8_output # type: ignore +from clients.stratus.tools.text_editing.windowed_file import ( # type: ignore + FileNotOpened, + TextNotFound, + WindowedFile, +) + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + + +def update_file_vars_in_state( + state: State, + message: str | ToolMessage | AIMessage | HumanMessage, + tool_call_id: Annotated[str, InjectedToolCallId] = "", +) -> State: + logger.info("updating state with message: %s", message) + logger.info(f"state: {state}, tool_call_id: {tool_call_id}") + new_state = state + + match message: + case str(): + logger.info("Not updating state as message is a string") + new_state["messages"] = new_state["messages"] + [ToolMessage(content=message, tool_call_id=tool_call_id)] + case ToolMessage(): + logger.info("Trying to update states with message as ToolMessage") + tool_call_msg = "" + for i in range(len(new_state["messages"]) - 1, -1, -1): + if hasattr(new_state["messages"][i], "tool_calls") and len(new_state["messages"][i].tool_calls) > 0: + tool_call_msg = new_state["messages"][i] + logger.info("Found last tool call message: %s", tool_call_msg) + break + tool_name = tool_call_msg.tool_calls[0]["name"] + tool_args = tool_call_msg.tool_calls[0]["args"] + logger.info("Found tool args: %s", tool_args) + if tool_name == "open_file": + new_state["curr_file"] = tool_args["path"] + new_state["curr_line"] = tool_args["line_number"] + new_state["workdir"] = str(Path(tool_args["path"]).parent) + elif tool_name == "goto_line": + new_state["curr_line"] = tool_args["line_number"] + elif tool_name == "create": + new_state["curr_file"] = tool_args["path"] + new_state["workdir"] = str(Path(tool_args["path"]).parent) + elif tool_name == "edit": + # Explicitly pointing out as this tool does not modify agent state + pass + elif tool_name == "insert": + # Explicitly pointing out as this tool does not modify agent state + pass + + new_state["messages"] = new_state["messages"] + [message] + case _: + logger.info("Not found open_file or goto_line in message: %s", message) + logger.info("Not updating state") + logger.info("Updated state: %s", new_state) + return new_state + + +@tool("open_file", description="open a file, path: , line_number: ") +def open_file( + state: Annotated[dict, InjectedState] = None, + tool_call_id: Annotated[str, InjectedToolCallId] = "", + path: Optional[str] = None, + line_number: Optional[str] = None, +) -> Command: + logger.info("in open_file, the last msg: %s", state["messages"][-1]) + if path is None: + msg_txt = 'Usage: open "" []' + return Command( + update=update_file_vars_in_state(state, msg_txt, tool_call_id), + ) + + if not os.path.exists(path): + msg_txt = f"Error: File '{path}' does not exist." + return Command( + update=update_file_vars_in_state(state, msg_txt, tool_call_id), + ) + + wf = WindowedFile(path=Path(path), exit_on_exception=False) + + if line_number is not None: + try: + line_num = int(line_number) + except ValueError: + msg_txt = 'Usage: open "" []' + "Error: must be a number" + return Command( + update=update_file_vars_in_state(state, msg_txt, tool_call_id), + ) + if line_num > wf.n_lines: + msg_txt = ( + f"Warning: ({line_num}) is greater than the number of lines in the file ({wf.n_lines})" + + f"Warning: Setting to {wf.n_lines}" + ) + line_num = wf.n_lines + return Command( + update=update_file_vars_in_state(state, msg_txt, tool_call_id), + ) + elif line_num < 1: + msg_txt = f"Warning: ({line_num}) is less than 1" + "Warning: Setting to 1" + line_num = 1 + return Command( + update=update_file_vars_in_state(state, msg_txt, tool_call_id), + ) + else: + # Default to middle of window if no line number provided + line_num = wf.first_line + + wf.goto(line_num - 1, mode="top") + msg_txt = wf.get_window_text(line_numbers=True, status_line=True, pre_post_line=True) + return Command( + update=update_file_vars_in_state( + state, + ToolMessage(content=msg_txt, tool_call_id=tool_call_id), + ), + ) + + +@tool("goto_line", description="goto a line in an opened file, line_number: ") +def goto_line( + state: Annotated[dict, InjectedState], + tool_call_id: Annotated[str, InjectedToolCallId], + line_number: Optional[int] = None, +) -> Command: + if state["curr_file"] == "": + msg_txt = "Error: No file is open, use open_file to open a file first" + return Command( + update=update_file_vars_in_state(state, msg_txt, tool_call_id), + ) + + if line_number is None: + msg_txt = "Usage: goto " + return Command( + update=update_file_vars_in_state(state, msg_txt, tool_call_id), + ) + + try: + line_number = int(line_number) + except ValueError: + msg_txt = "Usage: goto \n" + "Error: must be a number" + return Command( + update=update_file_vars_in_state(state, msg_txt, tool_call_id), + ) + + curr_file = str(state["curr_file"]) + wf = WindowedFile(curr_file) + + if line_number > wf.n_lines: + msg_txt = f"Error: must be less than or equal to {wf.n_lines}" + return Command( + update=update_file_vars_in_state(state, msg_txt, tool_call_id), + ) + + # Convert from 1-based line numbers (user input) to 0-based (internal representation) + wf.goto(line_number - 1, mode="top") + wf.print_window() + msg_txt = wf.get_window_text(line_numbers=True, status_line=True, pre_post_line=True) + return Command( + update=update_file_vars_in_state( + state, + ToolMessage(content=msg_txt, tool_call_id=tool_call_id), + ) + ) + + +@tool("create", description="Create a new file. path: ") +def create(state: Annotated[dict, InjectedState], tool_call_id: Annotated[str, InjectedToolCallId], path: str): + path = Path(path) + if path.exists(): + msg_txt = f"Warning: File '{path}' already exists." + return Command( + update=update_file_vars_in_state( + state, + ToolMessage(content=msg_txt, tool_call_id=tool_call_id), + ) + ) + + path.write_text("\n") + + wf = WindowedFile(path=path) + wf.first_line = 0 + wf.print_window() + msg_txt = "File created successfully." + return Command( + update=update_file_vars_in_state( + state, + ToolMessage(content=msg_txt, tool_call_id=tool_call_id), + ) + ) + + +_NOT_FOUND = """Your edit was not applied (file not modified): Text {search!r} not found in displayed lines (or anywhere in the file). +Please modify your search string. Did you forget to properly handle whitespace/indentation? +You can also call `open` again to re-display the file with the correct context. +""" + +_NOT_FOUND_IN_WINDOW_MSG = """Your edit was not applied (file not modified): Text {search!r} not found in displayed lines. + +However, we found the following occurrences of your search string in the file: + +{occurrences} + +You can use the `goto` command to navigate to these locations before running the edit command again. +""" + +_MULTIPLE_OCCURRENCES_MSG = """Your edit was not applied (file not modified): Found more than one occurrence of {search!r} in the currently displayed lines. +Please make your search string more specific (for example, by including more lines of context). +""" + +_NO_CHANGES_MADE_MSG = """Your search and replace strings are the same. No changes were made. Please modify your search or replace strings.""" + +_SINGLE_EDIT_SUCCESS_MSG = """Text replaced. Please review the changes and make sure they are correct: + +1. The edited file is correctly indented +2. The edited file does not contain duplicate lines +3. The edit does not break existing functionality + +Edit the file again if necessary.""" + +_MULTIPLE_EDITS_SUCCESS_MSG = """Replaced {n_replacements} occurrences. Please review the changes and make sure they are correct: + +1. The edited file is correctly indented +2. The edited file does not contain duplicate lines +3. The edit does not break existing functionality + +Edit the file again if necessary.""" + +_LINT_ERROR_TEMPLATE = """Your proposed edit has introduced new syntax error(s). Please read this error message carefully and then retry editing the file. + +ERRORS: + +{errors} + +This is how your edit would have looked if applied +------------------------------------------------ +{window_applied} +------------------------------------------------ + +This is the original code before your edit +------------------------------------------------ +{window_original} +------------------------------------------------ + +Your changes have NOT been applied. Please fix your edit command and try again. +DO NOT re-run the same failed edit command. Running it again will lead to the same error. +""" + + +@tool("edit") +def edit( + state: Annotated[dict, InjectedState] = None, + tool_call_id: Annotated[str, InjectedToolCallId] = "", + search: str = "", + replace: str = "", + replace_all: bool = "", +) -> Command: + """ + Replace first occurrence of with in the currently displayed lines. + If replace-all is True , replace all occurrences of with . + + For example, if you are looking at this file: + + def fct(): + print("Hello world") + + and you want to edit the file to read: + + def fct(): + print("Hello") + print("world") + + you can search for `Hello world` and replace with `"Hello"\n print("world")` + (note the extra spaces before the print statement!). + + Tips: + + 1. Always include proper whitespace/indentation + 2. When you are adding an if/with/try statement, you need to INDENT the block that follows, so make sure to include it in both your search and replace strings! + 3. If you are wrapping code in a try statement, make sure to also add an 'except' or 'finally' block. + + Before every edit, please + + 1. Explain the code you want to edit and why it is causing the problem + 2. Explain the edit you want to make and how it fixes the problem + 3. Explain how the edit does not break existing functionality + """ + if not isinstance(state["curr_file"], str): + logger.error("INTERNAL: state curr file should be a string") + exit(1) + if len(state["curr_file"]) == 0: + msg_txt = "No file opened. Either `open` or `create` a file first." + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) + + wf = WindowedFile(path=state["curr_file"]) + + # Turn \\n into \n etc., i.e., undo the escaping + # args.replace = args.replace.encode("utf8").decode("unicode_escape") + + if search == replace: + msg_txt = _NO_CHANGES_MADE_MSG + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) + + pre_edit_lint = flake8(wf.path) + + try: + if not replace_all: + window_text = wf.get_window_text() + if window_text.count(search) > 1: + msg_txt = _MULTIPLE_OCCURRENCES_MSG.format(search=search) + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) + replacement_info = wf.replace_in_window(search, replace) + # todo: Should warn if more than one occurrence was found? + else: + # todo: Give overview of all replaced occurrences/number of replacements + replacement_info = wf.replace(search, replace) + except TextNotFound: + line_no_founds = wf.find_all_occurrences(search, zero_based=False) + if line_no_founds: + msg_txt = _NOT_FOUND_IN_WINDOW_MSG.format( + search=search, occurrences="\n".join([f"- line {line_no}" for line_no in line_no_founds]) + ) + else: + msg_txt = _NOT_FOUND.format(search=search) + msg_txt = msg_txt + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) + + post_edit_lint = flake8(wf.path) + + if not replace_all: + # Try to filter out pre-existing errors + replacement_window = ( + replacement_info.first_replaced_line, + replacement_info.first_replaced_line + replacement_info.n_search_lines - 1, + ) + new_flake8_output = format_flake8_output( + post_edit_lint, + previous_errors_string=pre_edit_lint, + replacement_window=replacement_window, + replacement_n_lines=replacement_info.n_replace_lines, + ) + else: + # Cannot easily compare the error strings, because line number changes are hard to keep track of + # So we show all linter errors. + new_flake8_output = format_flake8_output(post_edit_lint) + + if new_flake8_output: + with_edits = wf.get_window_text(line_numbers=True, status_line=True, pre_post_line=True) + wf.undo_edit() + without_edits = wf.get_window_text(line_numbers=True, status_line=True, pre_post_line=True) + msg_txt = _LINT_ERROR_TEMPLATE.format( + errors=new_flake8_output, + window_applied=with_edits, + window_original=without_edits, + ) + msg_txt = msg_txt + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) + if not replace_all: + msg_txt = _SINGLE_EDIT_SUCCESS_MSG + else: + msg_txt = _MULTIPLE_EDITS_SUCCESS_MSG.format(n_replacements=replacement_info.n_replacements) + + msg_txt = msg_txt + "\n\n" + wf.get_window_text(line_numbers=True, status_line=True, pre_post_line=True) + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) + + +@tool("insert") +def insert( + state: Annotated[dict, InjectedState], + tool_call_id: Annotated[str, InjectedToolCallId], + text: str, + line_number: Union[int, None] = None, +): + """ + Insert at the end of the currently opened file or after if specified. + """ + if len(state["curr_file"]) == 0: + msg_txt = "No file opened. Either `open` or `create` a file first." + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) + wf = WindowedFile(state["curr_file"]) + + pre_edit_lint = flake8(wf.path) + insert_info = wf.insert(text, line=line_number - 1 if line_number is not None else None) + post_edit_lint = flake8(wf.path) + + # Try to filter out pre-existing errors + replacement_window = (insert_info.first_inserted_line, insert_info.first_inserted_line) + new_flake8_output = format_flake8_output( + post_edit_lint, + previous_errors_string=pre_edit_lint, + replacement_window=replacement_window, + replacement_n_lines=insert_info.n_lines_added, + ) + + if new_flake8_output: + with_edits = wf.get_window_text(line_numbers=True, status_line=True, pre_post_line=True) + wf.undo_edit() + without_edits = wf.get_window_text(line_numbers=True, status_line=True, pre_post_line=True) + msg_txt = _LINT_ERROR_TEMPLATE.format( + errors=new_flake8_output, window_applied=with_edits, window_original=without_edits + ) + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) + + msg_txt = wf.get_window_text(line_numbers=True, status_line=True, pre_post_line=True) + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) diff --git a/clients/stratus/tools/text_editing/flake8_utils.py b/clients/stratus/tools/text_editing/flake8_utils.py new file mode 100644 index 0000000..25a2b68 --- /dev/null +++ b/clients/stratus/tools/text_editing/flake8_utils.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python3 + +"""This helper command is used to parse and print flake8 output.""" + +# ruff: noqa: UP007 UP006 UP035 + +import subprocess +from pathlib import Path +from typing import List, Optional, Tuple + +try: + from sweagent import TOOLS_DIR +except ImportError: + pass +else: + import sys + + default_lib = TOOLS_DIR / "text_editing" / "lib" + assert default_lib.is_dir() + sys.path.append(str(default_lib)) + sys.path.append(str(TOOLS_DIR / "registry" / "lib")) + + +class Flake8Error: + """A class to represent a single flake8 error""" + + def __init__(self, filename: str, line_number: int, col_number: int, problem: str): + self.filename = filename + self.line_number = line_number + self.col_number = col_number + self.problem = problem + + @classmethod + def from_line(cls, line: str): + try: + prefix, _sep, problem = line.partition(": ") + filename, line_number, col_number = prefix.split(":") + except (ValueError, IndexError) as e: + msg = f"Invalid flake8 error line: {line}" + raise ValueError(msg) from e + return cls(filename, int(line_number), int(col_number), problem) + + def __eq__(self, other): + if not isinstance(other, Flake8Error): + return NotImplemented + return ( + self.filename == other.filename + and self.line_number == other.line_number + and self.col_number == other.col_number + and self.problem == other.problem + ) + + def __repr__(self): + return f"Flake8Error(filename={self.filename}, line_number={self.line_number}, col_number={self.col_number}, problem={self.problem})" + + +def _update_previous_errors( + previous_errors: List[Flake8Error], replacement_window: Tuple[int, int], replacement_n_lines: int +) -> List[Flake8Error]: + """Update the line numbers of the previous errors to what they would be after the edit window. + This is a helper function for `_filter_previous_errors`. + + All previous errors that are inside of the edit window should not be ignored, + so they are removed from the previous errors list. + + Args: + previous_errors: list of errors with old line numbers + replacement_window: the window of the edit/lines that will be replaced + replacement_n_lines: the number of lines that will be used to replace the text + + Returns: + list of errors with updated line numbers + """ + updated = [] + lines_added = replacement_n_lines - (replacement_window[1] - replacement_window[0] + 1) + for error in previous_errors: + if error.line_number < replacement_window[0]: + # no need to adjust the line number + updated.append(error) + continue + if replacement_window[0] <= error.line_number <= replacement_window[1]: + # The error is within the edit window, so let's not ignore it + # either way (we wouldn't know how to adjust the line number anyway) + continue + # We're out of the edit window, so we need to adjust the line number + updated.append(Flake8Error(error.filename, error.line_number + lines_added, error.col_number, error.problem)) + return updated + + +def format_flake8_output( + input_string: str, + show_line_numbers: bool = False, + *, + previous_errors_string: str = "", + replacement_window: Optional[Tuple[int, int]] = None, + replacement_n_lines: Optional[int] = None, +) -> str: + """Filter flake8 output for previous errors and print it for a given file. + + Args: + input_string: The flake8 output as a string + show_line_numbers: Whether to show line numbers in the output + previous_errors_string: The previous errors as a string + replacement_window: The window of the edit (lines that will be replaced) + replacement_n_lines: The number of lines used to replace the text + + Returns: + The filtered flake8 output as a string + """ + errors = [Flake8Error.from_line(line.strip()) for line in input_string.split("\n") if line.strip()] + # print(f"New errors before filtering: {errors=}") + lines = [] + if previous_errors_string: + assert replacement_window is not None + assert replacement_n_lines is not None + previous_errors = [ + Flake8Error.from_line(line.strip()) for line in previous_errors_string.split("\n") if line.strip() + ] + # print(f"Previous errors before updating: {previous_errors=}") + previous_errors = _update_previous_errors(previous_errors, replacement_window, replacement_n_lines) + # print(f"Previous errors after updating: {previous_errors=}") + errors = [error for error in errors if error not in previous_errors] + # Sometimes new errors appear above the replacement window that were 'shadowed' by the previous errors + # they still clearly aren't caused by the edit. + errors = [error for error in errors if error.line_number >= replacement_window[0]] + # print(f"New errors after filtering: {errors=}") + for error in errors: + if not show_line_numbers: + lines.append(f"- {error.problem}") + else: + lines.append(f"- line {error.line_number} col {error.col_number}: {error.problem}") + return "\n".join(lines) + + +def flake8(file_path: str) -> str: + """Run flake8 on a given file and return the output as a string""" + if Path(file_path).suffix != ".py": + return "" + cmd = "flake8 --isolated --select=F821,F822,F831,E111,E112,E113,E999,E902 {file_path}" + # don't use capture_output because it's not compatible with python3.6 + out = subprocess.run(cmd.format(file_path=file_path), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + return out.stdout.decode() diff --git a/clients/stratus/tools/text_editing/insert.py b/clients/stratus/tools/text_editing/insert.py new file mode 100644 index 0000000..76e2ef3 --- /dev/null +++ b/clients/stratus/tools/text_editing/insert.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 + +import argparse +from typing import Annotated, Union + +from langchain_core.tools import InjectedToolCallId, tool +from langgraph.prebuilt import InjectedState +from langgraph.types import Command + +from clients.stratus.tools.text_editing.file_manip import update_file_vars_in_state + +RETRY_WITH_OUTPUT_TOKEN = "###SWE-AGENT-RETRY-WITH-OUTPUT###" + +_LINT_ERROR_TEMPLATE = """Your proposed edit has introduced new syntax error(s). +Please read this error message carefully and then retry editing the file. + +ERRORS: + +{errors} + +This is how your edit would have looked if applied +------------------------------------------------ +{window_applied} +------------------------------------------------ + +This is the original code before your edit +------------------------------------------------ +{window_original} +------------------------------------------------ + +Your changes have NOT been applied. Please fix your edit command and try again. +DO NOT re-run the same failed edit command. Running it again will lead to the same error. +""" + + +def get_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser() + parser.add_argument("text", type=str) + parser.add_argument("line", type=int, nargs="?", default=None) + return parser + + +@tool("insert") +def insert( + state: Annotated[dict, InjectedState], + tool_call_id: Annotated[str, InjectedToolCallId], + text: str, + line: Union[int, None] = None, +): + """ + Insert at the end of the currently opened file or after if specified. + """ + if len(state["curr_file"]) == 0: + msg_txt = "No file opened. Either `open` or `create` a file first." + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) + wf = WindowedFile(state["curr_file"]) + + pre_edit_lint = flake8(wf.path) + insert_info = wf.insert(text, line=line - 1 if line is not None else None) + post_edit_lint = flake8(wf.path) + + # Try to filter out pre-existing errors + replacement_window = (insert_info.first_inserted_line, insert_info.first_inserted_line) + new_flake8_output = format_flake8_output( + post_edit_lint, + previous_errors_string=pre_edit_lint, + replacement_window=replacement_window, + replacement_n_lines=insert_info.n_lines_added, + ) + + if new_flake8_output: + with_edits = wf.get_window_text(line_numbers=True, status_line=True, pre_post_line=True) + wf.undo_edit() + without_edits = wf.get_window_text(line_numbers=True, status_line=True, pre_post_line=True) + msg_txt = _LINT_ERROR_TEMPLATE.format( + errors=new_flake8_output, window_applied=with_edits, window_original=without_edits + ) + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) + + msg_txt = wf.get_window_text(line_numbers=True, status_line=True, pre_post_line=True) + return Command(update=update_file_vars_in_state(state, msg_txt, tool_call_id)) diff --git a/clients/stratus/tools/text_editing/windowed_file.py b/clients/stratus/tools/text_editing/windowed_file.py new file mode 100644 index 0000000..b29b12a --- /dev/null +++ b/clients/stratus/tools/text_editing/windowed_file.py @@ -0,0 +1,303 @@ +import json +import os +from pathlib import Path +from typing import Any, List, Optional, Tuple, Union + + +class FileNotOpened(Exception): + """Raised when no file is opened.""" + + +class TextNotFound(Exception): + """Raised when the text is not found in the window.""" + + +def _find_all(a_str: str, sub: str): + start = 0 + while True: + start = a_str.find(sub, start) + if start == -1: + return + yield start + start += len(sub) + + +class ReplacementInfo: + def __init__(self, first_replaced_line: int, n_search_lines: int, n_replace_lines: int, n_replacements: int): + self.first_replaced_line = first_replaced_line + self.n_search_lines = n_search_lines + self.n_replace_lines = n_replace_lines + self.n_replacements = n_replacements + + def __repr__(self): + return f"ReplacementInfo(first_replaced_line={self.first_replaced_line}, n_search_lines={self.n_search_lines}, n_replace_lines={self.n_replace_lines}, n_replacements={self.n_replacements})" + + +class InsertInfo: + def __init__(self, first_inserted_line: int, n_lines_added: int): + self.first_inserted_line = first_inserted_line + self.n_lines_added = n_lines_added + + +class WindowedFile: + def __init__( + self, + path: Optional[Path] = None, + *, + first_line: Optional[int] = None, + window: Optional[int] = None, + exit_on_exception: bool = True, + ): + """ + + Args: + path: Path to the file to open. + first_line: First line of the display window. + window: Number of lines to display. + exit_on_exception: If False, will raise exception. + If true, will print an error message and exit. + + Will create file if not found. + + Internal convention/notes: + + * All line numbers are 0-indexed. + * Previously, we used "current_line" for the internal state + of the window position, pointing to the middle of the window. + Now, we use `first_line` for this purpose (it's simpler this way). + """ + + # We don't need the registry stuff as the tool node should acquire a state + # and the state should include an existing path, or not if the tool is never used. + _path = path + self._exit_on_exception = exit_on_exception + if not _path: + if self._exit_on_exception: + print("No file open. Use the open command first.") + exit(1) + raise FileNotOpened + self.path = Path(_path) + if self.path.is_dir(): + msg = f"Error: {self.path} is a directory. You can only open files. Use cd or ls to navigate directories." + if self._exit_on_exception: + print(msg) + exit(1) + raise IsADirectoryError(msg) + if not self.path.exists(): + msg = f"Error: File {self.path} not found" + if self._exit_on_exception: + print(msg) + exit(1) + raise FileNotFoundError(msg) + + # FIXME: magic number, defaulting window to 10 lines + self.window = 10 + # FIXME: magic number, set to default from swe-agent now + self.overlap = 0 + # Ensure that we get a valid current line by using the setter + self._first_line = 0 + # FIXME: magic number, set to default from swe-agent now + self.first_line = 0 + self.offset_multiplier = 1 / 6 + self._original_text = self.text + self._original_first_line = self.first_line + + def set_window_text(self, new_text: str, *, line_range: Optional[Tuple[int, int]] = None) -> None: + """Replace the text in the current display window with a new string.""" + text = self.text.split("\n") + if line_range is not None: + start, stop = line_range + else: + start, stop = self.line_range + + # Handle empty replacement text (deletion case) + new_lines = new_text.split("\n") if new_text else [] + text[start : stop + 1] = new_lines + self.text = "\n".join(text) + + def insert(self, text: str, line: Optional[int] = None, *, reset_first_line: str = "top") -> "InsertInfo": + # Standardize empty text handling + if not text: + return InsertInfo(first_inserted_line=(self.n_lines if line is None else line), n_lines_added=0) + + # Remove single trailing newline if it exists + text = text[:-1] if text.endswith("\n") else text + + if line is None: + # Append to end of file + if not self.text: + new_text = text + else: + current_text = self.text[:-1] if self.text.endswith("\n") else self.text + new_text = current_text + "\n" + text + insert_line = self.n_lines + elif line < 0: + # Insert at start of file + if not self.text: + new_text = text + else: + current_text = self.text[1:] if self.text.startswith("\n") else self.text + new_text = text + "\n" + current_text + insert_line = 0 + else: + # Insert at specific line + lines = self.text.split("\n") + lines.insert(line, text) + new_text = "\n".join(lines) + insert_line = line + + self.text = new_text + if reset_first_line != "keep": + self.goto(insert_line, mode=reset_first_line) + + return InsertInfo(first_inserted_line=insert_line, n_lines_added=len(text.split("\n"))) + + def replace_in_window( + self, + search: str, + replace: str, + *, + reset_first_line: str = "top", + ) -> "ReplacementInfo": + """Search and replace in the window. + + Args: + search: The string to search for (can be multi-line). + replace: The string to replace it with (can be multi-line). + reset_first_line: If "keep", we keep the current line. Otherwise, we + `goto` the line where the replacement started with this mode. + """ + window_text = self.get_window_text() + # Update line number + index = window_text.find(search) + if index == -1: + if self._exit_on_exception: + print(f"Error: Text not found: {search}") + exit(1) + raise TextNotFound + window_start_line, _ = self.line_range + replace_start_line = window_start_line + len(window_text[:index].split("\n")) - 1 + new_window_text = window_text.replace(search, replace) + self.set_window_text(new_window_text) + if reset_first_line == "keep": + pass + else: + self.goto(replace_start_line, mode=reset_first_line) + return ReplacementInfo( + first_replaced_line=replace_start_line, + n_search_lines=len(search.split("\n")), + n_replace_lines=len(replace.split("\n")), + n_replacements=1, + ) + + def replace(self, search: str, replace: str, *, reset_first_line: str = "top") -> "ReplacementInfo": + indices = list(_find_all(self.text, search)) + if not indices: + if self._exit_on_exception: + print(f"Error: Text not found: {search}") + exit(1) + raise TextNotFound + replace_start_line = len(self.text[: indices[0]].split("\n")) + new_text = self.text.replace(search, replace) + self.text = new_text + if reset_first_line == "keep": + pass + else: + self.goto(replace_start_line, mode=reset_first_line) + return ReplacementInfo( + first_replaced_line=replace_start_line, + n_search_lines=len(search.split("\n")), + n_replace_lines=len(replace.split("\n")), + n_replacements=len(indices), + ) + + def find_all_occurrences(self, search: str, zero_based: bool = True) -> List[int]: + """Returns the line numbers of all occurrences of the search string.""" + indices = list(_find_all(self.text, search)) + line_numbers = [] + for index in indices: + line_no = len(self.text[:index].split("\n")) + if zero_based: + line_numbers.append(line_no - 1) + else: + line_numbers.append(line_no) + return line_numbers + + def undo_edit(self): + self.text = self._original_text + self.first_line = self._original_first_line + + @property + def first_line(self) -> int: + return self._first_line + + @first_line.setter + def first_line(self, value: Union[int, float]): + self._original_first_line = self.first_line + value = int(value) + self._first_line = max(0, min(value, self.n_lines - 1 - self.window)) + + @property + def text(self) -> str: + return self.path.read_text() + + @text.setter + def text(self, new_text: str): + self._original_text = self.text + self.path.write_text(new_text) + + @property + def n_lines(self) -> int: + return len(self.text.splitlines()) + + @property + def line_range(self) -> Tuple[int, int]: + """Return first and last line (inclusive) of the display window, such + that exactly `window` many lines are displayed. + This means `line_range[1] - line_range[0] == window-1` as long as there are + at least `window` lines in the file. `first_line` does the handling + of making sure that we don't go out of bounds. + """ + return self.first_line, min(self.first_line + self.window - 1, self.n_lines - 1) + + def get_window_text( + self, *, line_numbers: bool = False, status_line: bool = False, pre_post_line: bool = False + ) -> str: + """Get the text in the current display window with optional status/extra information + + Args: + line_numbers: include line numbers in the output + status_line: include the status line in the output (file path, total lines) + pre_post_line: include the pre/post line in the output (number of lines above/below) + """ + start_line, end_line = self.line_range + lines = self.text.split("\n")[start_line : end_line + 1] + out_lines = [] + if status_line: + out_lines.append(f"[File: {self.path} ({self.n_lines} lines total)]") + if pre_post_line: + if start_line > 0: + out_lines.append(f"({start_line} more lines above)") + if line_numbers: + out_lines.extend(f"{i + start_line + 1}:{line}" for i, line in enumerate(lines)) + else: + out_lines.extend(lines) + if pre_post_line: + if end_line < self.n_lines - 1: + out_lines.append(f"({self.n_lines - end_line - 1} more lines below)") + return "\n".join(out_lines) + + def print_window(self, *, line_numbers: bool = True, status_line: bool = True, pre_post_line: bool = True): + print(self.get_window_text(line_numbers=line_numbers, status_line=status_line, pre_post_line=pre_post_line)) + + def goto(self, line: int, mode: str = "top"): + if mode == "top": + self.first_line = line - self.window * self.offset_multiplier + else: + raise NotImplementedError + + def scroll(self, n_lines: int): + if n_lines > 0: + self.first_line += n_lines - self.overlap + elif n_lines < 0: + self.first_line += n_lines + self.overlap diff --git a/clients/stratus/tools/wait_tool.py b/clients/stratus/tools/wait_tool.py new file mode 100644 index 0000000..49df892 --- /dev/null +++ b/clients/stratus/tools/wait_tool.py @@ -0,0 +1,35 @@ +import logging +import time +from typing import Annotated + +from langchain_core.messages import ToolMessage +from langchain_core.tools import InjectedToolCallId, tool +from langgraph.types import Command + +wait_tool_docstring = """ +Use this tool to wait for you action to take effect. The upper limit is 120 seconds. + Any value above 120 seconds will be truncated to 120 seconds. If you call this tool + along with other tools in your tool_calls list, this tool will be scheduled to the + last for execution. + + Args: + seconds (int): Number of seconds to wait. +""" + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + + +@tool(description=wait_tool_docstring) +def wait_tool(seconds: int, tool_call_id: Annotated[str, InjectedToolCallId]) -> Command: + + message = "" + if seconds > 120: + message += ( + f"Request waiting {seconds} sec, but the maximum wait time is 120 sec. " f"Will be truncated to 120 sec." + ) + seconds = max(0, min(seconds, 120)) + time.sleep(seconds) + message += f"wait_tool has been called to wait {seconds} seconds." + logger.info(message) + return Command(update={"messages": [ToolMessage(message, tool_call_id=tool_call_id)]}) diff --git a/clients/stratus/weak_oracles/__init__.py b/clients/stratus/weak_oracles/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/clients/stratus/weak_oracles/base_oracle.py b/clients/stratus/weak_oracles/base_oracle.py new file mode 100644 index 0000000..9e65398 --- /dev/null +++ b/clients/stratus/weak_oracles/base_oracle.py @@ -0,0 +1,20 @@ +from abc import ABC, abstractmethod +from typing import List + + +class OracleResult: + success: bool + issues: List[str] + + def __init__(self, success: bool, issues: List[str]): + self.success = success + self.issues = issues + + def __str__(self): + return f"Your last mitigation attempt [{"has succeeded" if self.success else "has failed"}]. The potential issues are [{"no issues as you have succeeded" if self.success else self.issues}]" + + +class BaseOracle(ABC): + @abstractmethod + async def validate(self, **kwargs) -> OracleResult: + pass diff --git a/clients/stratus/weak_oracles/cluster_state_oracle.py b/clients/stratus/weak_oracles/cluster_state_oracle.py new file mode 100644 index 0000000..7509b8a --- /dev/null +++ b/clients/stratus/weak_oracles/cluster_state_oracle.py @@ -0,0 +1,98 @@ +import os + +from clients.stratus.weak_oracles.base_oracle import BaseOracle, OracleResult + + +class ClusterStateOracle(BaseOracle): + async def validate(self, namespace="default", **kwargs) -> OracleResult: + """ + Validates the Kubernetes cluster status. + + Args: + namespace (str): The namespace to check + + Returns: + dict: A dict containing validation results with 'success' and 'issues' keys + """ + results = {"success": True, "issues": []} + + from kubernetes import client, config + + # Load Kubernetes configuration + if os.path.exists(os.path.expanduser("~/.kube/config")): + config.load_kube_config() + else: + config.load_incluster_config() + + # print(f"Validating cluster status on namespace '{namespace}'...") + + try: + # Initialize Kubernetes API client + v1 = client.CoreV1Api() + + # Get all pods in the namespace + pod_list = v1.list_namespaced_pod(namespace) + + for pod in pod_list.items: + pod_name = pod.metadata.name + pod_issues = [] + + # Skip if pod is being terminated + if pod.metadata.deletion_timestamp: + continue + + # Check pod status + if pod.status.phase not in ["Running", "Succeeded"]: + issue = f"Pod {pod_name} is in {pod.status.phase} state" + pod_issues.append(issue) + results["issues"].append(issue) + results["success"] = False + + # Check container statuses + if pod.status.container_statuses: + for container_status in pod.status.container_statuses: + container_name = container_status.name + + if container_status.state.waiting: + reason = container_status.state.waiting.reason + issue = f"Container {container_name} in pod {pod_name} is waiting: {reason}" + if reason == "CrashLoopBackOff": + issue = f"Container {container_name} is in CrashLoopBackOff" + pod_issues.append(issue) + results["issues"].append(issue) + results["success"] = False + + elif ( + container_status.state.terminated + and container_status.state.terminated.reason != "Completed" + ): + reason = container_status.state.terminated.reason + issue = f"Container {container_name} is terminated with reason: {reason}" + pod_issues.append(issue) + results["issues"].append(issue) + results["success"] = False + + elif not container_status.ready and pod.status.phase == "Running": + issue = f"Container {container_name} is not ready" + pod_issues.append(issue) + results["issues"].append(issue) + results["success"] = False + + if pod_issues: + print(f"Issues found with pod {pod_name}:") + for issue in pod_issues: + print(f" - {issue}") + + if results["success"]: + print("All pods are running normally.") + else: + print(f"Found {len(results['issues'])} issues in the cluster.") + + except Exception as e: + results["success"] = False + results["issues"].append(f"Error validating cluster: {str(e)}") + print(f"Error validating cluster: {str(e)}") + + results = OracleResult(success=results["success"], issues=results["issues"]) + + return results diff --git a/clients/stratus/weak_oracles/workload_oracle.py b/clients/stratus/weak_oracles/workload_oracle.py new file mode 100644 index 0000000..8d6781a --- /dev/null +++ b/clients/stratus/weak_oracles/workload_oracle.py @@ -0,0 +1,263 @@ +import asyncio +import time + +import yaml +from kubernetes import client, config +from kubernetes.client import V1JobStatus +from pydantic import ConfigDict, Field + +from clients.stratus.weak_oracles.base_oracle import BaseOracle, OracleResult + +# from aiopslab.generators.workload.wrk import Wrk +from sregym.paths import BASE_DIR, TARGET_MICROSERVICES +from sregym.service.apps.base import Application +from sregym.service.kubectl import KubeCtl + +# from sregym.generators.workload.wrk2 import Wrk2 as Wrk + + +class Wrk: + def __init__(self, rate, dist="norm", connections=2, duration=6, threads=2, latency=True): + self.rate = rate + self.dist = dist + self.connections = connections + self.duration = duration + self.threads = threads + self.latency = latency + + config.load_kube_config() + + self.kubectl = KubeCtl() + + def create_configmap(self, name, namespace, payload_script_path): + with open(payload_script_path, "r") as script_file: + script_content = script_file.read() + + configmap_body = client.V1ConfigMap( + metadata=client.V1ObjectMeta(name=name), + data={payload_script_path.name: script_content}, + ) + + api_instance = client.CoreV1Api() + try: + print(f"Checking for existing ConfigMap '{name}'...") + api_instance.delete_namespaced_config_map(name=name, namespace=namespace) + print(f"ConfigMap '{name}' deleted.") + except client.exceptions.ApiException as e: + if e.status != 404: + print(f"Error deleting ConfigMap '{name}': {e}") + return + + try: + print(f"Creating ConfigMap '{name}'...") + api_instance.create_namespaced_config_map(namespace=namespace, body=configmap_body) + print(f"ConfigMap '{name}' created successfully.") + except client.exceptions.ApiException as e: + print(f"Error creating ConfigMap '{name}': {e}") + + def create_wrk_job(self, job_name, namespace, payload_script, url): + wrk_job_yaml = BASE_DIR / "generators" / "workload" / "wrk-job-template.yaml" + with open(wrk_job_yaml, "r") as f: + job_template = yaml.safe_load(f) + + job_template["metadata"]["name"] = job_name + container = job_template["spec"]["template"]["spec"]["containers"][0] + container["args"] = [ + "wrk", + "-D", + self.dist, + "-t", + str(self.threads), + "-c", + str(self.connections), + "-d", + f"{self.duration}s", + "-L", + "-s", + f"/scripts/{payload_script}", + url, + "-R", + str(self.rate), + ] + + if self.latency: + container["args"].append("--latency") + + job_template["spec"]["template"]["spec"]["volumes"] = [ + { + "name": "wrk2-scripts", + "configMap": {"name": "wrk2-payload-script"}, + } + ] + job_template["spec"]["template"]["spec"]["containers"][0]["volumeMounts"] = [ + { + "name": "wrk2-scripts", + "mountPath": f"/scripts/{payload_script}", + "subPath": payload_script, + } + ] + + api_instance = client.BatchV1Api() + try: + existing_job = api_instance.read_namespaced_job(name=job_name, namespace=namespace) + if existing_job: + print(f"Job '{job_name}' already exists. Deleting it...") + api_instance.delete_namespaced_job( + name=job_name, namespace=namespace, body=client.V1DeleteOptions(propagation_policy="Foreground") + ) + time.sleep(5) + except client.exceptions.ApiException as e: + if e.status != 404: + print(f"Error checking for existing job: {e}") + return + + try: + response = api_instance.create_namespaced_job(namespace=namespace, body=job_template) + print(f"Job created: {response.metadata.name}") + except client.exceptions.ApiException as e: + print(f"Error creating job: {e}") + return + + try: + while True: + job_status = api_instance.read_namespaced_job_status(name=job_name, namespace=namespace) + if job_status.status.ready: + print("Job completed successfully.") + break + elif job_status.status.failed: + print("Job failed.") + break + time.sleep(5) + except client.exceptions.ApiException as e: + print(f"Error monitoring job: {e}") + + def start_workload(self, payload_script, url): + namespace = "default" + configmap_name = "wrk2-payload-script" + + self.create_configmap(name=configmap_name, namespace=namespace, payload_script_path=payload_script) + + self.create_wrk_job(job_name="wrk2-job", namespace=namespace, payload_script=payload_script.name, url=url) + + +class WorkloadOracle(BaseOracle): + passable: bool = Field(default=True) + + model_config = ConfigDict(arbitrary_types_allowed=True) + app: Application = Field(default=None, description="Start workload") + core_v1_api: client.CoreV1Api = Field(default=None, description="Kubernetes CoreV1 API client") + batch_v1_api: client.BatchV1Api = Field(default=None, description="Kubernetes BatchV1 API client") + wrk: Wrk = Field(default=None, description="Wrk workload generator") + + def __init__(self, app: Application): + super().__init__() + self.app = app + + config.load_kube_config() + self.core_v1_api = client.CoreV1Api() + self.batch_v1_api = client.BatchV1Api() + self.kubectl = KubeCtl() + + def get_job_logs(self, job_name, namespace): + """Retrieve the logs of a specified job within a namespace.""" + + pods = self.core_v1_api.list_namespaced_pod(namespace, label_selector=f"job-name={job_name}") + print( + pods.items[0].metadata.name, + self.core_v1_api.read_namespaced_pod_log(pods.items[0].metadata.name, namespace), + ) + if len(pods.items) == 0: + raise Exception(f"No pods found for job {job_name} in namespace {namespace}") + return self.core_v1_api.read_namespaced_pod_log(pods.items[0].metadata.name, namespace) + + def get_base_url(self): + # these are assumed to be initialized within the specific app + endpoint = self.kubectl.get_cluster_ip(self.app.frontend_service, self.app.namespace) + return f"http://{endpoint}:{self.app.frontend_port}" + + def get_workloads(self, app_type): + if app_type == "Social Network": + base_dir = TARGET_MICROSERVICES / "socialNetwork/wrk2/scripts/social-network" + return [ + {"payload_script": base_dir / "compose-post.lua", "url": "/wrk2-api/post/compose"}, + {"payload_script": base_dir / "read-home-timeline.lua", "url": "/wrk2-api/home-timeline/read"}, + {"payload_script": base_dir / "read-user-timeline.lua", "url": "/wrk2-api/user-timeline/read"}, + ] + elif app_type == "Hotel Reservation": + base_dir = TARGET_MICROSERVICES / "hotelReservation/wrk2/scripts/hotel-reservation" + return [ + {"payload_script": base_dir / "mixed-workload_type_1.lua", "url": ""}, + ] + else: + raise Exception(f"Unknown app type: {app_type}") + + @staticmethod + def is_job_completed(job_status: V1JobStatus) -> bool: + if hasattr(job_status, "conditions") and job_status.conditions is not None: + for condition in job_status.conditions: + if condition.type == "Complete" and condition.status == "True": + return True + return False + + async def get_workload_result(self, job_name): + self.kubectl.wait_for_job_completion(job_name=job_name, namespace="default") + + namespace = "default" + + logs = None + try: + logs = self.get_job_logs( + job_name=job_name, + namespace=namespace, + ) + logs = "\n".join(logs.split("\n")) + except Exception as e: + return f"Workload Generator Error: {e}" + + return logs + + def start_workload(self, payload_script, url, job_name): + namespace = "default" + configmap_name = "wrk2-payload-script" + + self.wrk.create_configmap(name=configmap_name, namespace=namespace, payload_script_path=payload_script) + + self.wrk.create_wrk_job(job_name=job_name, namespace=namespace, payload_script=payload_script.name, url=url) + + async def validate(self) -> OracleResult: + print("Testing workload generator...", flush=True) + self.wrk = Wrk(rate=10, dist="exp", connections=2, duration=10, threads=2) + + result = {"success": True, "issues": []} + + base_url = self.get_base_url() + + for runid, run_config in enumerate(self.get_workloads(self.app.name)): + payload_script = run_config["payload_script"] + url = base_url + run_config["url"] + job_name = f"wrk2-job-{runid}" + + self.start_workload(payload_script, url, job_name) + wrk_result = await self.get_workload_result(job_name) + if ( + "Workload Generator Error:" in wrk_result + or "Requests/sec:" not in wrk_result + or "Transfer/sec:" not in wrk_result + ): + result["issues"].append("Workload Generator Error") + result["success"] = False + break + elif "Non-2xx or 3xx responses:" in wrk_result: + issue = "" + for line in wrk_result.split("\n"): + if "Non-2xx or 3xx responses:" in line: + issue = line + break + result["issues"].append(issue) + result["success"] = False + break + + return OracleResult( + success=result["success"], + issues=[str(result)], + ) diff --git a/dashboard/__init__.py b/dashboard/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dashboard/assets/utils.js b/dashboard/assets/utils.js new file mode 100644 index 0000000..74cf631 --- /dev/null +++ b/dashboard/assets/utils.js @@ -0,0 +1,17 @@ +window.dash_clientside = Object.assign({}, window.dash_clientside, { + utils: Object.assign({}, (window.dash_clientside || {}).utils, { + scrollToBottom: function(triggerValue) { + try { + // Only scroll if triggerValue is True (new log arrived) + if (triggerValue === true) { + var container = document.getElementById('rows-display'); + if (container) { + container.scrollTop = container.scrollHeight; + } + } + } catch (e) {} + return Date.now(); + } + }) +}); + diff --git a/dashboard/dashboard_app.py b/dashboard/dashboard_app.py new file mode 100644 index 0000000..ce19dc0 --- /dev/null +++ b/dashboard/dashboard_app.py @@ -0,0 +1,1053 @@ +import atexit +import json +import os +import re +import signal +import sys +import time +from datetime import datetime +from pathlib import Path + +import dash +import pandas as pd +import plotly.express as px +import plotly.graph_objs as go +from dash import Input, Output, State, callback, dcc, html +from dash.dependencies import ClientsideFunction + +sys.path.append(os.path.join(os.path.dirname(__file__), "..", "sregym", "service")) +import threading +from collections import deque + +import yaml +from flask import request + +from sregym.service.kubectl import KubeCtl + +DASHBOARD_URL = "http://127.0.0.1:11451" + + +class SREGymDashboardServer: + """SREGym Dashboard Server Class""" + + # CONCISE Toggle - when True, hide pods for all-green deployments + CONCISE_TOGGLE = True + + def __init__(self, host="127.0.0.1", port=11451, debug=True): + """ + Initialize the dashboard server + + Args: + host (str): Server host address + port (int): Server port + debug (bool): Enable debug mode + """ + self.host = host + self.port = port + self.debug = debug + + # Initialize Dash application + self.app = dash.Dash(__name__) + + # Log history storage + self.log_history = [] + # External log queue for logs received via API (thread-safe with lock) + self.external_log_queue = deque() + self._log_lock = threading.Lock() + self._state_lock = threading.Lock() + # Structured rows: history of {log, ckpt_state} + self.history_rows: list[dict] = [] + self.latest_log: dict | None = None + self.latest_ckpt_state: dict | None = None + # No extra export data structure; export will render from existing state on exit + + # Setup the application + self._setup_layout() + self._setup_callbacks() + self._setup_api_routes() + self.namespace = "default" + + self.kubectl = KubeCtl() + # self.host_info = self.get_host_info() + + # Register graceful shutdown export hooks + atexit.register(self._export_on_exit) + # Flag to track if we've already handled a shutdown signal + self._signal_handled = False + # Register signal handlers for graceful shutdown + try: + signal.signal(signal.SIGINT, self._handle_signal) # Ctrl-C + signal.signal(signal.SIGTERM, self._handle_signal) # terminate() sends SIGTERM + except Exception: + pass + + # a bit not elegant here to be honest + def get_host_info(self): + # read the script/ansible/inventory.yml, and return the host info + worker_info = {} + with open(os.path.join(os.path.dirname(__file__), "..", "scripts", "ansible", "inventory.yml"), "r") as f: + inventory = yaml.safe_load(f) + + # Extract variables from all.vars + variables = {} + if "all" in inventory and "vars" in inventory["all"]: + variables = inventory["all"]["vars"] + + # get all the workers + if "all" in inventory and "children" in inventory["all"] and "worker_nodes" in inventory["all"]["children"]: + workers = inventory["all"]["children"]["worker_nodes"]["hosts"] + for worker in workers: + ansible_host = workers[worker]["ansible_host"] + ansible_user = workers[worker]["ansible_user"] + + # Replace variables in ansible_user + ansible_user = self._replace_variables(ansible_user, variables) + + # Skip if variables couldn't be resolved + if "{{" in ansible_user: + print(f"Warning: Unresolved variables in {worker} user: {ansible_user}") + continue + + worker_info[ansible_host] = ansible_user + return worker_info + + print(f"No worker nodes found in the inventory file, your cluster is not applicable for this fault injector") + return None + + def _replace_variables(self, value, variables): + """Replace variables in a string with their values""" + for key, val in variables.items(): + value = value.replace(f"{{{{{key}}}}}", str(val)) + return value + + def _setup_api_routes(self): + """Setup API routes for external log ingestion""" + + @self.app.server.route("/api/logs", methods=["POST"]) + def receive_log(): + try: + log_data = request.get_json() + if log_data: + # Convert external log format to internal format + content = log_data.get("content", "") + sort = log_data.get("sort", "DUMMY") + log_entry = { + "type": sort, + "content": content, + "timestamp": log_data.get("timestamp", "Fault Extract Timestamp"), + "location": log_data.get("location", ""), + } + print(">>>", log_entry) + with self._log_lock: + self.external_log_queue.append(log_entry) + if sort == "STAGE" and content.startswith("Start"): + # try to find "" + match = re.search(r"<(.*?)>", content) + if match: + self.namespace = match.group(1) + return {"status": "success"}, 200 + except Exception as e: + return {"status": "error", "message": str(e)}, 400 + + def _generate_log_data(self): + """Generate log data from external queue""" + # Check for external logs first + log_entry = None + with self._log_lock: + if self.external_log_queue: + log_entry = self.external_log_queue.popleft() + + if log_entry is not None: + # Add new log to history + self.log_history.append(log_entry) + # Return the latest log for display + return log_entry + # No external logs available, return None to skip this update + return None + + def _drain_log_queue(self) -> list[dict]: + """Drain and return all pending logs from the external queue in FIFO order.""" + drained: list[dict] = [] + with self._log_lock: + while self.external_log_queue: + drained.append(self.external_log_queue.popleft()) + if drained: + self.log_history.extend(drained) + return drained + + def _get_export_dir(self) -> Path: + export_dir = os.getenv("SREGYM_DASH_EXPORT_DIR", ".") + p = Path(export_dir).expanduser().resolve() + try: + p.mkdir(parents=True, exist_ok=True) + except Exception: + p = Path(".").resolve() + return p + + def _build_log_html(self, log_entry: dict) -> str: + type_colors = { + "STAGE": "#ffc107", + "ENV": "#007bff", + "LLM": "#6c757d", + "PROMPT": "#28a745", + "ERROR": "#dc3545", + "EVAL": "#6f42c1", + "SPLIT": "#dee2e6", + "WARNING": "#fd7e14", + } + color = type_colors.get(log_entry.get("type", ""), "#6c757d") + ts = log_entry.get("timestamp", "") + typ = log_entry.get("type", "") + loc = log_entry.get("location", "") + content = (log_entry.get("content", "") or "").split("\n") + content_html = "".join([("
    " if i > 0 else "") + self._escape_html(line) for i, line in enumerate(content)]) + return ( + f'
    ' + f'[{self._escape_html(ts)}] ' + f'{self._escape_html(typ)}' + f'{self._escape_html(loc)}' + f'{content_html}' + f"
    " + ) + + def _escape_html(self, s: str) -> str: + return ( + (s or "") + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace('"', """) + .replace("'", "'") + ) + + def _build_status_html(self, cluster_data: dict | None) -> str: + if not cluster_data: + return "
    " + nodes = cluster_data.get("nodes", {}) if cluster_data else {} + deployments = cluster_data.get("deployments", {}) if cluster_data else {} + + # Nodes + node_spans: list[str] = [] + for node_name, node_info in nodes.items(): + ready = node_info.get("ready", "Unknown") + issues = node_info.get("issues", []) + if ready == "False": + node_emoji = "🔴" + elif ready == "Unknown": + node_emoji = "❓" + elif ready == "True": + node_emoji = "🟡" if issues else "🟢" + else: + node_emoji = "❓" + issues_text = "" + if issues: + issues_letters = [str(issue)[0].upper() for issue in issues] + issues_text = " " + "".join(issues_letters) + node_spans.append( + f'{self._escape_html(node_name)}' + f'{node_emoji}' + f'{self._escape_html(issues_text)}' + ) + + # Deployments split into 3 columns + deployment_list = list(deployments.items()) + per_col = len(deployment_list) // 3 + (1 if len(deployment_list) % 3 != 0 else 0) + col1 = deployment_list[:per_col] + col2 = deployment_list[per_col : per_col * 2] + col3 = deployment_list[per_col * 2 :] + + def build_col(items: list[tuple[str, dict]]) -> str: + parts: list[str] = [] + for dep_name, dep_data in items: + dep_meta = dep_data.get("deployment_meta", ("0/0/0", "gray")) + pods = dep_data.get("pods", []) + status_text, status_color = dep_meta + dep_emoji = ( + "🟢" + if status_color == "green" + else ("🟡" if status_color == "yellow" else ("🔴" if status_color == "red" else "⚪")) + ) + + pod_lines: list[str] = [] + all_pods_green = bool(pods) and all((p.get("status") == "Running" for p in pods)) + show_pods = not (self.CONCISE_TOGGLE and status_color == "green" and all_pods_green) + if show_pods: + for pod in pods: + node_number = pod.get("node_number", -1) + node_emoji = f"{node_number}️⃣" if 1 <= node_number <= 9 else "❓" + status_letter = (pod.get("status") or "U")[0].upper() + color = pod.get("status_color", "#6c757d") + pod_lines.append( + f'
    ' + f'{node_emoji}' + f'{self._escape_html(pod.get("name", "Unknown"))}' + f'{self._escape_html(status_letter)}' + f"
    " + ) + + parts.append( + f'
    ' + f'{dep_emoji}' + f'{self._escape_html(dep_name)}' + f'{self._escape_html(status_text)}' + f"
    {''.join(pod_lines)}
    " + ) + return "".join(parts) + + col1_html = build_col(col1) + col2_html = build_col(col2) + col3_html = build_col(col3) + + return ( + f'
    ' + f'
    {''.join(node_spans)}
    ' + f'
    ' + f'
    {col1_html}
    ' + f'
    {col2_html}
    ' + f'
    {col3_html}
    ' + f"
    " + ) + + def _build_row_html(self, log_entry: dict, ckpt_state: dict | None, use_realtime_right: bool = False) -> str: + left = self._build_log_html(log_entry) + right = self._build_status_html(ckpt_state) + return ( + f'
    ' + f'
    {left}
    ' + f'
    {right}
    ' + f"
    " + ) + + def _export_on_exit(self): + try: + # Freeze a consistent snapshot + if self._state_lock.acquire(blocking=True): + try: + # Drain any remaining logs and append to log_history; build export from log_history + remaining = self._drain_log_queue() + final_snapshot = self._collect_cluster_data(self.namespace) + + export_dir = self._get_export_dir() + ts = datetime.now().strftime("%Y%m%d_%H%M%S") + out_file = export_dir / f"sregym_dashboard_{ts}.html" + + rows_html: list[str] = [] + # Build from log_history in order; use final snapshot for right side + for log_entry in self.log_history: + if log_entry.get("type") == "SPLIT": + rows_html.append( + '

    ' + ) + else: + rows_html.append(self._build_row_html(log_entry, final_snapshot)) + + # Latest row with its saved checkpoint if exists + if self.latest_log is not None: + rows_html.append(self._build_row_html(self.latest_log or {}, final_snapshot)) + + html_doc = ( + '' + '' + "SREGym Dashboard Export" + "" + f"
    {''.join(rows_html)}
    " + '
    ' + '

    © 2025 SREGym Dashboard - xlab, UIUC

    ' + "
    " + "" + ) + with open(out_file, "w", encoding="utf-8") as f: + f.write(html_doc) + print(f"[SREGym Dashboard] Exported HTML to: {out_file}") + finally: + try: + self._state_lock.release() + except Exception: + pass + except Exception as e: + print(f"[SREGym Dashboard] Export on exit failed: {e}") + + def _handle_signal(self, signum, frame): + """ + Handle shutdown signals (SIGINT, SIGTERM) gracefully. + Exports trace data before exiting. + """ + if self._signal_handled: + # Already handled a shutdown signal, ignore subsequent ones + signal_name = ( + "SIGINT" if signum == signal.SIGINT else "SIGTERM" if signum == signal.SIGTERM else f"signal {signum}" + ) + print(f"[SREGym Dashboard] Ignoring subsequent {signal_name} signal...") + return + + # Mark that we've handled the signal + self._signal_handled = True + + signal_name = ( + "SIGINT" if signum == signal.SIGINT else "SIGTERM" if signum == signal.SIGTERM else f"signal {signum}" + ) + print(f"[SREGym Dashboard] Caught {signal_name}, exporting current view...") + + try: + # Export trace data before exiting + self._export_on_exit() + print(f"[SREGym Dashboard] Export completed, shutting down...") + except Exception as e: + print(f"[SREGym Dashboard] Error during export: {e}", file=sys.stderr) + finally: + # Use os._exit() to ensure immediate termination after export + # This is important for daemon processes + os._exit(0) + + def _parse_concise_deployment_info(self, info_str): + """Parse the concise deployment info from kubectl get deployment -o wide output""" + if not info_str or not info_str.strip(): + return {} + + lines = info_str.strip().split("\n") + if len(lines) < 2: + return {} + + # Skip the header line + data_lines = lines[1:] + deployments = {} + + for line in data_lines: + if not line.strip(): + continue + + # Split by whitespace + parts = line.split() + if len(parts) < 4: # Minimum required fields: NAME, READY, UP-TO-DATE, AVAILABLE + continue + + name = parts[0] + ready = parts[1] # Format: X/Y + available = parts[3] # AVAILABLE column + + # Create the format: {name: A/X/Y} + real, expected = ready.split("/") + if available == 0: # red + color = "red" + elif available == expected: # green + color = "green" + else: # yellow + color = "yellow" + deployments[name] = (f"{available}/{ready}", color) + + return deployments + + def _parse_concise_pod_info(self, info_str): + """Parse the concise pod info from kubectl get pod -o wide output""" + # print(info_str) + # print("\n\n\n\n") + if not info_str or not info_str.strip(): + return [] + + lines = info_str.strip().split("\n") + if len(lines) < 2: + return [] + + # Skip the header line + data_lines = lines[1:] + pods_info = [] + + for line in data_lines: + if not line.strip(): + continue + + # Split by whitespace, but handle the case where some fields might be empty + parts = line.split() + if len(parts) < 8: # Minimum required fields + continue + + pod_info = { + "name": parts[0], + "ready": parts[1], + "status": parts[2], + "node": parts[6], + } + + # only take the last 5 hash from the name + # catch the part after the last two - + parts = pod_info["name"].split("-")[:-2] + # print(parts) + pod_info["deployment_name"] = "" + for part in parts: + pod_info["deployment_name"] += part + if part != parts[-1]: + pod_info["deployment_name"] += "-" + pod_info["name"] = pod_info["name"][-5:] + + # Determine status color/emoji + status = pod_info["status"] + pod_info["color"] = "000000" + if status == "Running": # green + pod_info["status_color"] = "#28a745" + elif status in ["Pending", "ContainerCreating", "Terminating"]: # yellow + pod_info["status_color"] = "#ffc107" + elif status in ["Failed", "Error", "CrashLoopBackOff"]: # red + pod_info["status_color"] = "#dc3545" + else: # gray + pod_info["status_color"] = "#6c757d" + + # extract node number + try: + pod_info["node_number"] = int(pod_info["node"].split(".")[0][-1]) + except: + pod_info["node_number"] = -1 + + pods_info.append(pod_info) + + # extract deployment name + + return pods_info + + def _collect_cluster_data(self, app_namespace): + """collect the cluster data""" + try: + deployments = self.kubectl.get_concise_deployments_info(app_namespace) + pods_raw = self.kubectl.get_concise_pods_info(app_namespace) + nodes = self.kubectl.list_nodes() + + # Process nodes information + nodes_info = {} + if nodes and hasattr(nodes, "items"): + for node in nodes.items: + node_dict = node.to_dict() if hasattr(node, "to_dict") else node + status = node_dict.get("status", {}) + conditions = status.get("conditions", []) + + node_info = { + "ready": "Unknown", + "issues": [], + } + + for condition in conditions: + if condition.get("type") == "Ready": + node_info["ready"] = condition.get("status", "Unknown") + else: + if condition.get("status") == "True": + node_info["issues"].append(condition.get("type", "Unknown")) + + nodes_info[node_dict.get("metadata", {}).get("name", "Unknown").split(".")[0]] = node_info + + pods_info = self._parse_concise_pod_info(pods_raw) + # print(pods_info) + # print("\n\n\n\n") + deployments_info = self._parse_concise_deployment_info(deployments) if deployments else {} + deployments_info_with_pods = {} + + for deployment in deployments_info.keys(): + deployments_info_with_pods[deployment] = { + "pods": [pod for pod in pods_info if pod["deployment_name"] == deployment], + "deployment_meta": deployments_info[deployment], + } + + return {"nodes": nodes_info, "deployments": deployments_info_with_pods, "namespace": app_namespace} + + except Exception as e: + print(f"Error collecting cluster data: {str(e)}") + return {"pods": [], "nodes": {}, "deployments": {}, "namespace": app_namespace, "error": str(e)} + + def _render_log_block(self, log_entry: dict) -> html.Div: + """Render a single log entry as a colored block based on its type.""" + type_colors = { + "STAGE": "#ffc107", # yellow + "ENV": "#007bff", # blue + "LLM": "#6c757d", # gray + "PROMPT": "#28a745", # green + "ERROR": "#dc3545", # red + "EVAL": "#6f42c1", # purple + "SPLIT": "#dee2e6", # light gray + "WARNING": "#fd7e14", # orange-red + } + color = type_colors.get(log_entry.get("type", ""), "#6c757d") + container_style = { + "background-color": color + "20", + "border": f"1px solid {color}", + "border-radius": "8px", + "padding": "8px", + "font-family": "Consolas", + } + + # Split content by newlines and create proper line breaks + content_lines = log_entry.get("content", "").split("\n") + + # Build full content elements + full_elements = [] + for i, line in enumerate(content_lines): + if i > 0: + full_elements.append(html.Br()) + full_elements.append(line) + + # If more than 6 lines, render a collapsible block with a preview (first 6 lines) + if len(content_lines) > 6: + preview_elements = [] + for i, line in enumerate(content_lines[:6]): + if i > 0: + preview_elements.append(html.Br()) + preview_elements.append(line) + # Add an ellipsis hint in the preview + preview_elements.append(html.Span(" …", style={"color": "#6c757d"})) + + content_component = html.Details( + [ + html.Summary( + html.Span( + preview_elements, style={"color": "#2c3e50", "font-family": "Consolas", "font-size": "12px"} + ) + ), + html.Div( + full_elements, + style={"color": "#2c3e50", "font-family": "Consolas", "font-size": "12px", "marginTop": "6px"}, + ), + ] + ) + else: + content_component = html.Span( + full_elements, style={"color": "#2c3e50", "font-family": "Consolas", "font-size": "12px"} + ) + + return html.Div( + [ + html.Span(f"[{log_entry.get('timestamp', '')}] ", style={"color": "#6c757d"}), + html.Span( + log_entry.get("type", ""), + style={ + "color": color, + "font-weight": "bold", + "background-color": color + "20", + "padding": "2px 6px", + "border-radius": "4px", + "margin-right": "8px", + "font-family": "Consolas", + "font-size": "12px", + }, + ), + html.Span( + f"{log_entry.get('location', '')}", + style={"color": "#6c757d", "font-family": "Consolas", "font-size": "11px", "margin-right": "8px"}, + ), + content_component, + ], + style=container_style, + ) + + def _render_split_line(self) -> html.Div: + """Render a simple split line for SPLIT type logs in history.""" + return html.Div( + [html.Hr(style={"border": "none", "border-top": "2px solid #dee2e6", "margin": "10px 0", "width": "100%"})], + style={"width": "100%", "text-align": "center"}, + ) + + def _render_status_block(self, cluster_data: dict) -> html.Div: + """Render the status block (nodes + deployments) given cluster_data.""" + nodes = cluster_data.get("nodes", {}) if cluster_data else {} + deployments = cluster_data.get("deployments", {}) if cluster_data else {} + + # Nodes row + node_items = [] + for node_name, node_info in nodes.items(): + ready = node_info.get("ready", "Unknown") + issues = node_info.get("issues", []) + if ready == "False": + node_emoji = "🔴" + elif ready == "Unknown": + node_emoji = "❓" + elif ready == "True": + node_emoji = "🟡" if issues else "🟢" + else: + node_emoji = "❓" + issues_text = "" + if issues: + issues_letters = [issue[0].upper() for issue in issues] + issues_text = f" {''.join(issues_letters)}" + node_items.append( + html.Span( + [ + html.Span( + node_name, style={"margin-right": "3px", "font-size": "12px", "font-family": "Consolas"} + ), + html.Span( + node_emoji, style={"margin-right": "3px", "font-size": "10px", "font-family": "Consolas"} + ), + html.Strong( + issues_text, style={"color": "#ffc107", "font-size": "10px", "font-family": "Consolas"} + ), + ], + style={"margin-right": "10px"}, + ) + ) + + # Deployments 3 columns + deployment_list = list(deployments.items()) + per_col = len(deployment_list) // 3 + (1 if len(deployment_list) % 3 != 0 else 0) + col1 = deployment_list[:per_col] + col2 = deployment_list[per_col : per_col * 2] + col3 = deployment_list[per_col * 2 :] + + def build_col(items: list[tuple[str, dict]]) -> list: + column_items: list = [] + for dep_name, dep_data in items: + dep_meta = dep_data.get("deployment_meta", ("0/0/0", "gray")) + pods = dep_data.get("pods", []) + status_text, status_color = dep_meta + dep_emoji = ( + "🟢" + if status_color == "green" + else ("🟡" if status_color == "yellow" else ("🔴" if status_color == "red" else "⚪")) + ) + pod_items = [] + # Concise: hide pods if deployment is green and all pods are Running + all_pods_green = bool(pods) and all(p.get("status") == "Running" for p in pods) + show_pods = not (self.CONCISE_TOGGLE and status_color == "green" and all_pods_green) + if show_pods: + for pod in pods: + node_number = pod.get("node_number", -1) + node_emoji = f"{node_number}️⃣" if 1 <= node_number <= 9 else "❓" + status_letter = (pod.get("status") or "U")[0].upper() + pod_items.append( + html.Div( + [ + html.Span( + node_emoji, + style={"margin-right": "3px", "font-size": "10px", "font-family": "Consolas"}, + ), + html.Span( + pod.get("name", "Unknown"), + style={"margin-right": "3px", "font-size": "10px", "font-family": "Consolas"}, + ), + html.Strong( + status_letter, + style={ + "color": pod.get("status_color", "#6c757d"), + "font-weight": "bold", + "font-size": "10px", + "font-family": "Consolas", + }, + ), + ], + style={"margin-left": "15px", "margin-bottom": "1px"}, + ) + ) + column_items.append( + html.Div( + [ + html.Div( + [ + html.Span( + dep_emoji, + style={"margin-right": "3px", "font-size": "10px", "font-family": "Consolas"}, + ), + html.Strong( + dep_name, + style={"margin-right": "3px", "font-size": "11px", "font-family": "Consolas"}, + ), + html.Span( + status_text, + style={"color": "#6c757d", "font-size": "10px", "font-family": "Consolas"}, + ), + ], + style={"margin-bottom": "2px"}, + ), + html.Div(pod_items), + ], + style={"margin-bottom": "5px"}, + ) + ) + return column_items + + col1_items = build_col(col1) + col2_items = build_col(col2) + col3_items = build_col(col3) + + return html.Div( + [ + html.Div(node_items, style={"margin-bottom": "8px"}), + html.Div( + [ + html.Div( + col1_items, + style={ + "width": "33.33%", + "float": "left", + "padding-right": "5px", + "box-sizing": "border-box", + }, + ), + html.Div( + col2_items, + style={ + "width": "33.33%", + "float": "left", + "padding-right": "5px", + "box-sizing": "border-box", + }, + ), + html.Div(col3_items, style={"width": "33.33%", "float": "left", "box-sizing": "border-box"}), + ], + style={"width": "100%", "overflow": "hidden"}, + ), + ], + style={ + "border": "1px solid #e9ecef", + "border-radius": "8px", + "padding": "8px", + "background-color": "#ffffff", + }, + ) + + def _setup_layout(self): + """Setup the application layout""" + self.app.layout = html.Div( + [ + # Main container: multiple rows (log + status) + html.Div( + [ + html.Div( + id="rows-display", + style={ + "width": "100%", + "box-sizing": "border-box", + "max-height": "70vh", + "overflowY": "auto", + "scrollBehavior": "smooth", + }, + ), + dcc.Store(id="scroll-anchor"), + dcc.Store(id="new-log-trigger", data=None), + ], + style={"width": "100%", "overflow": "hidden", "margin-bottom": "20px"}, + ), + # Footer area + html.Div( + [ + html.P( + "© 2025 SREGym Dashboard - xlab, UIUC", + style={"text-align": "center", "color": "#6c757d", "margin": "0", "padding": "10px"}, + ) + ], + style={ + "background-color": "#f8f9fa", + "border-top": "1px solid #e9ecef", + "margin-top": "20px", + "min-height": "40px", + "display": "flex", + "align-items": "center", + "justify-content": "center", + }, + ), + # Timer component + dcc.Interval(id="interval-component", interval=3000, n_intervals=0), # Update every 3 seconds + ], + style={ + "font-family": "Arial, sans-serif", + "margin": "0", + "padding": "20px", + "background-color": "#ffffff", + "min-height": "100vh", + }, + ) + + def _setup_callbacks(self): + """Setup the application callbacks""" + + # Auto-scroll rows container to bottom only when new log arrives + self.app.clientside_callback( + ClientsideFunction(namespace="utils", function_name="scrollToBottom"), + Output("scroll-anchor", "data"), + Input("new-log-trigger", "data"), + ) + + @self.app.callback( + [Output("rows-display", "children"), Output("new-log-trigger", "data")], + Input("interval-component", "n_intervals"), + State("rows-display", "children"), + ) + def update_rows(n, current_children): + """Concurrency-safe render: build from server state only, ignore current_children.""" + # Always fetch realtime cluster state once per tick + # If another invocation is updating, skip this tick to avoid piling up + if self._state_lock.locked(): + return dash.no_update, None + print(f"<<<<<<<<<< Try update rows: {n}, children: {len(current_children) if current_children else 0}") + + # Collect realtime cluster data outside of lock (can be slow) + + # Try to enter critical section without blocking + if not self._state_lock.acquire(blocking=False): + return dash.no_update, None + try: + + realtime_state = self._collect_cluster_data(self.namespace) + # Drain all pending logs under the state lock to preserve ordering + new_logs = self._drain_log_queue() + print( + f"<<<<<<<<<< Entered critical section, Fetched New logs: {len(new_logs)}, children: {len(current_children) if current_children else 0}" + ) + # No new logs: refresh only the live row if exists + if not new_logs: + if self.latest_log is not None: + latest_row = html.Div( + [ + html.Div( + self._render_log_block(self.latest_log), + style={ + "width": "50%", + "float": "left", + "padding": "4px", + "box-sizing": "border-box", + }, + ), + html.Div( + self._render_status_block(realtime_state), + style={ + "width": "50%", + "float": "left", + "padding": "4px", + "box-sizing": "border-box", + }, + ), + ], + style={"width": "100%", "overflow": "hidden"}, + ) + children = list(self.history_rows) + children.append(latest_row) + return children, None + # No latest log yet, just return history + print(f"<<<<<<<<<< No latest log yet, just return history") + return list(self.history_rows), None + + # New logs arrived: snapshot cluster state as the checkpoint for this batch + snapshot_state = realtime_state + + # If there was a previous latest log, push it to history using its checkpoint + if self.latest_log is not None and self.latest_ckpt_state is not None: + if self.latest_log.get("type") == "SPLIT": + self.history_rows.append(self._render_split_line()) + else: + self.history_rows.append( + html.Div( + [ + html.Div( + self._render_log_block(self.latest_log), + style={ + "width": "50%", + "float": "left", + "padding": "4px", + "box-sizing": "border-box", + }, + ), + html.Div( + self._render_status_block(self.latest_ckpt_state), + style={ + "width": "50%", + "float": "left", + "padding": "4px", + "box-sizing": "border-box", + }, + ), + ], + style={"width": "100%", "overflow": "hidden", "margin-bottom": "6px"}, + ) + ) + + # Push all but the newest drained logs into history using the batch snapshot + if len(new_logs) > 1: + for queued_log in new_logs[:-1]: + if queued_log.get("type") == "SPLIT": + self.history_rows.append(self._render_split_line()) + else: + self.history_rows.append( + html.Div( + [ + html.Div( + self._render_log_block(queued_log), + style={ + "width": "50%", + "float": "left", + "padding": "4px", + "box-sizing": "border-box", + }, + ), + html.Div( + self._render_status_block(snapshot_state), + style={ + "width": "50%", + "float": "left", + "padding": "4px", + "box-sizing": "border-box", + }, + ), + ], + style={"width": "100%", "overflow": "hidden", "margin-bottom": "6px"}, + ) + ) + + # Update latest pointers with the newest log in this batch + self.latest_log = new_logs[-1] + self.latest_ckpt_state = snapshot_state + + # Build the new latest row with real-time state on the right + latest_row = html.Div( + [ + html.Div( + self._render_log_block(self.latest_log), + style={"width": "50%", "float": "left", "padding": "4px", "box-sizing": "border-box"}, + ), + html.Div( + self._render_status_block(realtime_state), + style={"width": "50%", "float": "left", "padding": "4px", "box-sizing": "border-box"}, + ), + ], + style={"width": "100%", "overflow": "hidden"}, + ) + + children = list(self.history_rows) + children.append(latest_row) + print(f"<<<<<<<<<< now children list has: {len(children)}") + return children, True + finally: + self._state_lock.release() + + def get_log_history(self): + """Get the complete log history""" + return self.log_history + + def clear_log_history(self): + """Clear the log history""" + self.log_history = [] + + def run(self, threaded=False): + """Start the dashboard server""" + print(f"Starting SREGym Dashboard on {self.host}:{self.port}") + in_main_thread = threading.current_thread() is threading.main_thread() + if threaded or not in_main_thread: + # When running in a thread, disable debug reloader and signals + self.app.run( + debug=False, + host=self.host, + port=self.port, + use_reloader=False, + # dev_tools_silence_routes_logging=True, + threaded=True, + ) + else: + # Main thread: allow normal debug behavior + # Disable reloader so Ctrl-C cleanly stops the server + self.app.run( + debug=self.debug, + host=self.host, + port=self.port, + use_reloader=False, + threaded=True, + # dev_tools_silence_routes_logging=True, + ) + + +if __name__ == "__main__": + # Create and run the dashboard server + dashboard = SREGymDashboardServer(host="127.0.0.1", port=11451, debug=True) + dashboard.run() diff --git a/dashboard/proxy.py b/dashboard/proxy.py new file mode 100644 index 0000000..5d66dee --- /dev/null +++ b/dashboard/proxy.py @@ -0,0 +1,97 @@ +from logging import Handler +import requests +import re +from dashboard.dashboard_app import DASHBOARD_URL +from datetime import datetime +from typing import Dict, Any + +class LogProxy(Handler): + # This proxy is used to proxy the logs to the dashboard + + def __init__(self): + super().__init__() + self.dashboard_url = DASHBOARD_URL.rstrip('/') + self.log_endpoint = f"{self.dashboard_url}/api/logs" + + def emit(self, record): + """ + Parse log record and send to dashboard + + Expected log format: "[SORT] xxxxxxxxxxxx" + """ + try: + # Get the log message + log_message = self.format(record) + + # Parse the log message + parsed_log = self._parse_log_message(log_message, record) + + # Send to dashboard + self._send_to_dashboard(parsed_log) + + except Exception as e: + # Avoid infinite recursion by not logging errors + print(f"LogProxy error: {e}") + + def _parse_log_message(self, log_message: str, record) -> Dict[str, Any]: + """ + Parse log message in format "[SORT] xxxxxxxxxxxx" + + Returns: + Dict with keys: timestamp, location, sort, content + """ + # Extract timestamp from record + timestamp = datetime.fromtimestamp(record.created).strftime('%Y-%m-%d %H:%M:%S') + + # Extract location (filename:line number) + location = f"{record.filename}:{record.lineno}" + + # Parse the log message for [SORT] pattern + sort_match = re.match(r'\[([^\]]+)\]\s*(.*)', log_message, re.DOTALL) + + if sort_match: + sort = sort_match.group(1).strip() + content = sort_match.group(2).strip() + else: + # If no [SORT] pattern found, use record level as sort + sort = record.levelname + content = log_message + + return { + 'timestamp': timestamp, + 'location': location, + 'sort': sort, + 'content': content + } + + def _send_to_dashboard(self, log_data: Dict[str, Any]): + """ + Send parsed log data to dashboard via HTTP POST + """ + try: + response = requests.post( + self.log_endpoint, + json=log_data, + timeout=1.0 # Short timeout to avoid blocking + ) + response.raise_for_status() + except requests.exceptions.RequestException: + # Silently fail to avoid infinite logging loops + pass + + +# Example usage: +# import logging +# from dashboard.proxy import LogProxy +# +# logger = logging.getLogger('my_app') +# logger.setLevel(logging.INFO) +# +# # Add LogProxy handler +# log_proxy = LogProxy(dashboard_url='http://0.0.0.0:8050') +# logger.addHandler(log_proxy) +# +# # Now log messages in format "[SORT] content" will be sent to dashboard +# logger.info("[STAGE] Application starting up") +# logger.error("[ERROR] Database connection failed") + \ No newline at end of file diff --git a/kind/.dockerignore b/kind/.dockerignore new file mode 100644 index 0000000..42061c0 --- /dev/null +++ b/kind/.dockerignore @@ -0,0 +1 @@ +README.md \ No newline at end of file diff --git a/kind/Dockerfile b/kind/Dockerfile new file mode 100644 index 0000000..ee83ec3 --- /dev/null +++ b/kind/Dockerfile @@ -0,0 +1,4 @@ +FROM kindest/node:v1.32.1 + +# Install udev and related packages +RUN apt-get update && apt-get install -y udev socat \ No newline at end of file diff --git a/kind/README.md b/kind/README.md new file mode 100644 index 0000000..b69f16e --- /dev/null +++ b/kind/README.md @@ -0,0 +1,162 @@ +# Building your own image + +This document provides detailed, step-by-step instructions for building your own kind image to deploy SREGym. + +--- + +## Table of Contents + +- [Overview](#overview) +- [System Compatibility](#system-compatibility) +- [Prerequisites](#prerequisites) + - [WSL2 and Ubuntu Setup](#wsl2-and-ubuntu-setup) + - [Install Docker](#install-docker) + - [Install kind](#install-kind) + - [Install kubectl](#install-kubectl) + - [Install Helm](#install-helm) +- [Deployment Steps](#deployment-steps) + - [1. Build the Custom KIND Image](#1-build-the-custom-kind-image) + - [2. (Optional) Push the Image to Dockerhub](#2-optional-push-the-image-to-dockerhub) + - [3. Create a kind Kubernetes Cluster](#4-create-a-kind-kubernetes-cluster) +- [Troubleshooting](#troubleshooting) +- [Conclusion](#conclusion) + +--- + +## **Overview** +SREGym is deployed using **containerized components** and Kubernetes manifests. This guide provides a step-by-step deployment process, covering: + +- Setting up **WSL2 (Windows Subsystem for Linux) or native Ubuntu 24.04**. +- Installing **Docker, kind, kubectl, Helm, Lua, Luarocks, and Luasocket**. +- Building a custom **kind image** and deploying SREGym into a **local Kubernetes cluster**. + +--- + +## **System Compatibility** +This setup has been successfully verified on the following environments: +1. **WSL2 Ubuntu** + ``` + Linux Warrens-Laptop 5.15.167.4-microsoft-standard-WSL2 #1 SMP Tue Nov 5 00:21:55 UTC 2024 x86_64 GNU/Linux + ``` +2. **Ubuntu 24.04 LTS (Cloud/Local Machine)** + ``` + Linux ubuntu-s-4vcpu-8gb-sfo3-01 6.8.0-52-generic #53-Ubuntu SMP PREEMPT_DYNAMIC Sat Jan 11 00:06:25 UTC 2025 x86_64 GNU/Linux + ``` + +--- + +## **Prerequisites** + +### **WSL2 and Ubuntu Setup** +Ensure that WSL2 is enabled on Windows and Ubuntu is installed. Follow the official [Microsoft WSL guide](https://learn.microsoft.com/en-us/windows/wsl/install). + + +### **Install Docker** +Docker is required to run Kubernetes and containers. Install Docker Desktop for WSL2 if using WSL2 Ubuntu, follow the [official Docker WSL documentation](https://docs.docker.com/desktop/wsl/). + +For native Ubuntu, install Docker using the following commands, follow the [official Docker installation guide](https://docs.docker.com/engine/install/ubuntu/). + +### **Install kind** +Install kind (Kubernetes IN Docker) to create a local Kubernetes cluster: +```bash +curl -Lo ./kind https://kind.sigs.k8s.io/dl/v0.27.0/kind-linux-amd64 +chmod +x ./kind +sudo mv ./kind /usr/local/bin/kind +``` + +For more installation options and documentation, see [kind documentation](https://kind.sigs.k8s.io/docs/user/quick-start/). +### **Install kubectl** +Install **kubectl** to interact with Kubernetes clusters: +```bash +sudo apt-get update +# apt-transport-https may be a dummy package; if so, you can skip that package +sudo apt-get install -y apt-transport-https ca-certificates curl gnupg + +# If the folder `/etc/apt/keyrings` does not exist, it should be created before the curl command, read the note below. +# sudo mkdir -p -m 755 /etc/apt/keyrings +curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.32/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg +sudo chmod 644 /etc/apt/keyrings/kubernetes-apt-keyring.gpg # allow unprivileged APT programs to read this keyring + +# This overwrites any existing configuration in /etc/apt/sources.list.d/kubernetes.list +echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.32/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list +sudo chmod 644 /etc/apt/sources.list.d/kubernetes.list # helps tools such as command-not-found to work correctly + +sudo apt-get update +sudo apt-get install -y kubectl +``` + +For further guidance, refer to [kubectl linux installation docs](https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/). + +### Install Helm + +Install Helm to manage Kubernetes applications: + +```bash +curl https://baltocdn.com/helm/signing.asc | gpg --dearmor | sudo tee /usr/share/keyrings/helm.gpg > /dev/null +sudo apt-get install apt-transport-https --yes +echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/helm.gpg] https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list +sudo apt-get update +sudo apt-get install helm +``` + +For more information, see the [Helm installation guide](https://helm.sh/docs/intro/install/). + +--- + +## Deployment Steps + +### 1. Build the Custom KIND Image + +The Dockerfile in this directory is designed specifically for Ubuntu running under WSL2 (amd64). **Please refer to this Dockerfile** to build an image that is compatible with your own machine + +Build the custom image using: + +```bash +docker build -t your_dockerhub_username/aiopslab-kind:latest -f kind/Dockerfile . +``` +> **Note:** Replace `your_dockerhub_username` with your Docker Hub account if pushing the image. + +### 2. (Optional) Push the Image to Dockerhub + +If you wish to publish your custom image and have it referenced by the kind configuration file, push it to Docker Hub: + +```bash +docker push your_dockerhub_username/aiopslab-kind:latest +``` + +Remember to update the `kind-config.yaml` file with your image name if you are using your own published image. + + +After finishing cluster creation, proceed to the next "Update config.yml" step. + +--- + +## **Troubleshooting** + +- **Docker Issues:** + Ensure Docker is running within your WSL2 environment. Verify with `docker ps` to list running containers. + +- **Cluster Creation Failures:** + Check that Docker is correctly installed and that your system has enough resources (CPU, memory). Examine the output of `kind export logs ` for details. + +- **Deployment Problems:** + Use `kubectl logs ` to view pod logs and diagnose application issues. Make sure that your `kind-config.yaml` file references the correct image. + +- **Resource Allocation:** + WSL2 may require additional resources. Adjust the WSL2 settings in your `.wslconfig` file on Windows if you encounter performance issues. + +- **Deployment Timeout Issues (Slow Network):** + If you have a slow local network connection, first-time deployments may timeout while pulling container images. Increase the timeout in your `.env` file: + + ```bash + WAIT_FOR_POD_READY_TIMEOUT=1800 # 30 minutes (recommended for slow networks) + ``` + + Subsequent deployments are faster since images are cached. Remote clusters typically don't need this adjustment. + +--- + +## **Conclusion** +This guide covers deploying **SREGym** on **both WSL2 and Ubuntu 24.04**, ensuring compatibility across different environments. By following these steps, you can successfully set up **Docker, kind, and Kubernetes** and deploy the SREGym application. + +For advanced configurations, refer to the [SREGym documentation](https://github.com/SREGym/SREGym). 🚀 diff --git a/kind/kind-config-arm.yaml b/kind/kind-config-arm.yaml new file mode 100644 index 0000000..d243730 --- /dev/null +++ b/kind/kind-config-arm.yaml @@ -0,0 +1,29 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +# uncomment below content to enable nodes to pull images from our exclusive docker registry +#containerdConfigPatches: +# - | +# [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"] +# endpoint = ["http://128.105.144.30:5000"] +nodes: + - role: control-plane + image: jacksonarthurclark/aiopslab-kind-arm:latest + extraMounts: + - hostPath: /run/udev + containerPath: /run/udev + - role: worker + image: jacksonarthurclark/aiopslab-kind-arm:latest + extraMounts: + - hostPath: /run/udev + containerPath: /run/udev + - role: worker + image: jacksonarthurclark/aiopslab-kind-arm:latest + extraMounts: + - hostPath: /run/udev + containerPath: /run/udev + - role: worker + image: jacksonarthurclark/aiopslab-kind-arm:latest + extraMounts: + - hostPath: /run/udev + containerPath: /run/udev + \ No newline at end of file diff --git a/kind/kind-config-x86.yaml b/kind/kind-config-x86.yaml new file mode 100644 index 0000000..9d54b49 --- /dev/null +++ b/kind/kind-config-x86.yaml @@ -0,0 +1,29 @@ +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +# uncomment below content to enable nodes to pull images from our exclusive docker registry +#containerdConfigPatches: +# - | +# [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"] +# endpoint = ["http://128.105.144.30:5000"] +nodes: + - role: control-plane + image: jacksonarthurclark/aiopslab-kind-x86:latest + extraMounts: + - hostPath: /run/udev + containerPath: /run/udev + - role: worker + image: jacksonarthurclark/aiopslab-kind-x86:latest + extraMounts: + - hostPath: /run/udev + containerPath: /run/udev + - role: worker + image: jacksonarthurclark/aiopslab-kind-x86:latest + extraMounts: + - hostPath: /run/udev + containerPath: /run/udev + - role: worker + image: jacksonarthurclark/aiopslab-kind-x86:latest + extraMounts: + - hostPath: /run/udev + containerPath: /run/udev + \ No newline at end of file diff --git a/logger/__init__.py b/logger/__init__.py new file mode 100644 index 0000000..ffc96ed --- /dev/null +++ b/logger/__init__.py @@ -0,0 +1,78 @@ +import logging +import os +from .handler import ExhaustInfoFormatter, ColorFormatter +from logging import Formatter +from datetime import datetime +from dashboard.proxy import LogProxy + + +def get_current_datetime_formatted(): + now = datetime.now() + formatted_datetime = now.strftime("%m-%d_%H-%M") + return formatted_datetime + +def init_logger(): + # set up the logger for dashboard + logging.getLogger('sregym-global').setLevel(logging.INFO) + logging.getLogger('sregym-global').addHandler(LogProxy()) + logging.getLogger('sregym-global').propagate = False # do not propagate to the real root logger ('') + + # set up the logger for log file + root_logger = logging.getLogger('all') + root_logger.setLevel(logging.DEBUG) + root_logger.propagate = False # do not propagate to the real root logger ('') + + timestamp = get_current_datetime_formatted() + # create dir and file + path = f'./logs/sregym_{timestamp}.log' + os.makedirs('./logs', exist_ok=True) + + handler = logging.FileHandler(path) + # add code line and filename and function name + handler.setFormatter(ExhaustInfoFormatter(fmt='%(asctime)s - %(levelname)s - %(name)s - %(message)s - %(filename)s:%(funcName)s:%(lineno)d', datefmt='%Y-%m-%d %H:%M:%S', extra_attributes=["sol", "result", "Full Prompt", "Tool Calls"])) + handler.setLevel(logging.DEBUG) + root_logger.addHandler(handler) + + + std_handler = logging.StreamHandler() + std_handler.setFormatter(ColorFormatter(fmt='%(levelname)s - %(name)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', style='%')) + std_handler.setLevel(logging.INFO) + root_logger.addHandler(std_handler) + + unify_third_party_loggers() + silent_litellm_loggers() + silent_httpx_loggers() + silent_FastMCP_loggers() + +def silent_paramiko_loggers(): + # make the paramiko logger silent + logging.getLogger('paramiko').setLevel(logging.WARNING) # throttle the log source + +def silent_FastMCP_loggers(): + # make the FastMCP logger silent + logging.getLogger('mcp').setLevel(logging.WARNING) + +def silent_litellm_loggers(): + verbose_proxy_logger = logging.getLogger("LiteLLM Proxy") + verbose_router_logger = logging.getLogger("LiteLLM Router") + verbose_logger = logging.getLogger("LiteLLM") + verbose_proxy_logger.setLevel(logging.WARNING) + verbose_router_logger.setLevel(logging.WARNING) + verbose_logger.setLevel(logging.WARNING) + +def silent_httpx_loggers(): + httpx_logger = logging.getLogger("httpx") + httpx_logger.setLevel(logging.WARNING) + +def unify_third_party_loggers(): + # make the info level third party loggers (e.g. paramiko) have the common formatter + logging.getLogger('') + # get the handler + handlers = logging.getLogger('').handlers + if handlers: + for handler in handlers: + handler.setFormatter(ColorFormatter(fmt='%(levelname)s - %(name)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', style='%')) + else: + print("No handler found for root logger") + +# silent uvicorn: main.py:96 \ No newline at end of file diff --git a/logger/handler.py b/logger/handler.py new file mode 100644 index 0000000..20aff62 --- /dev/null +++ b/logger/handler.py @@ -0,0 +1,42 @@ +import logging + +class ExhaustInfoFormatter(logging.Formatter): + + def __init__(self, fmt=None, datefmt=None, style='%', extra_attributes=[]): + super().__init__(fmt, datefmt, style) + self.base_format_string = fmt + self.extra_attributes = extra_attributes + + def format(self, record): + # 1. Execute parent class format() method to get base formatted log string + # This step handles standard fields like %(asctime)s, %(levelname)s, %(message)s + base_log_message = super().format(record) + + # 2. Find all non-standard (i.e., extra) fields + extra_fields = {key: value for key, value in record.__dict__.items() if key in self.extra_attributes} + + # 3. Build extra fields append string + extra_output = "" + if extra_fields: + # Format extra fields as 'key1=value1, key2=value2, ...' form + extra_parts = [f"{key} = {value}" for key, value in extra_fields.items()] + extra_output = " [" + ", ".join(extra_parts) + "]" + + # 4. Return base message and appended extra string + return base_log_message + extra_output + +class ColorFormatter(logging.Formatter): + def __init__(self, fmt=None, datefmt=None, style='%'): + super().__init__(fmt, datefmt, style) + + def format(self, record): + base_log_message = super().format(record) + if record.levelno == logging.DEBUG: #white + return base_log_message + elif record.levelno == logging.INFO: #blue + return f"\033[94m{base_log_message}\033[0m" + elif record.levelno == logging.WARNING: #yellow + return f"\033[92m{base_log_message}\033[0m" + elif record.levelno == logging.ERROR: #red + return f"\033[95m{base_log_message}\033[0m" + return base_log_message diff --git a/main.py b/main.py new file mode 100644 index 0000000..d56609b --- /dev/null +++ b/main.py @@ -0,0 +1,289 @@ +import argparse +import asyncio +import csv +import logging +import multiprocessing +import os +import sys +import threading +import time +from datetime import datetime + +import uvicorn +from rich.console import Console + +from dashboard.dashboard_app import SREGymDashboardServer +from dashboard.proxy import LogProxy +from logger import init_logger +from mcp_server.configs.load_all_cfg import mcp_server_cfg +from mcp_server.sregym_mcp_server import app as mcp_app +from sregym.agent_launcher import AgentLauncher +from sregym.agent_registry import get_agent, list_agents +from sregym.conductor.conductor import Conductor +from sregym.conductor.conductor_api import request_shutdown, run_api +from sregym.conductor.constants import StartProblemResult + +LAUNCHER = AgentLauncher() + + +def get_current_datetime_formatted(): + now = datetime.now() + formatted_datetime = now.strftime("%m-%d_%H-%M") + return formatted_datetime + + +def driver_loop(conductor: Conductor, problem_filter: str = None, use_external_harness: bool = False): + """ + Deploy each problem and wait for HTTP grading via POST /submit. + Returns a list of flattened dicts with results per problem. + + Args: + conductor: The Conductor instance + problem_filter: Optional problem ID to run. If specified, only this problem will be run. + use_external_harness: If True, inject fault and exit without running evaluation logic. + """ + + async def driver(): + console = Console() + # give the API a moment to bind + await asyncio.sleep(1) + agents_to_start = list_agents() + all_results = [] + for agent_name in agents_to_start.keys(): + console.log(f"Starting agent now: {agent_name}") + conductor.register_agent(agent_name) + all_results_for_agent = [] + + # Get all problem IDs and filter if needed + problem_ids = conductor.problems.get_problem_ids() + if problem_filter: + if problem_filter not in problem_ids: + console.log( + f"⚠️ Problem '{problem_filter}' not found in registry. Available problems: {problem_ids}" + ) + sys.exit(1) + problem_ids = [problem_filter] + console.log(f"🎯 Running single problem: {problem_filter}") + + for pid in problem_ids: + console.log(f"\n🔍 Starting problem: {pid}") + + conductor.problem_id = pid + + result = await conductor.start_problem() + if result == StartProblemResult.SKIPPED_KHAOS_REQUIRED: + console.log(f"⏭️ Skipping problem '{pid}': requires Khaos but running on emulated cluster") + continue + + # If using external harness, fault is injected - exit now + if use_external_harness: + console.log(f"✅ Fault injected for problem '{pid}'. Exiting for external harness.") + return [] + + if not use_external_harness: + reg = get_agent(agent_name) + if reg: + await LAUNCHER.ensure_started(reg) + + # Poll until grading completes + while conductor.submission_stage != "done": + await asyncio.sleep(1) + + console.log(f"✅ Completed {pid}: results={conductor.results}") + + snapshot = {"problem_id": pid} + for stage, outcome in conductor.results.items(): + if isinstance(outcome, dict): + for k, v in outcome.items(): + snapshot[f"{stage}.{k}"] = v + else: + snapshot[stage] = outcome + all_results_for_agent.append(snapshot) + + fieldnames = sorted({key for row in all_results_for_agent for key in row.keys()}) + current_date_time = get_current_datetime_formatted() + csv_path = f"{agent_name}_{current_date_time}_arena_{pid}_results.csv" + with open(csv_path, "w", newline="") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(all_results_for_agent) + print(f"✅ Problem {pid} for agent {agent_name} complete! Results written to {csv_path}") + entry_for_agent = {agent_name: all_results_for_agent} + all_results.append(entry_for_agent) + + return all_results + + return asyncio.run(driver()) + + +def start_mcp_server_after_api(): + # Small delay so the main API binds first (avoid port races if clients hit MCP immediately) + time.sleep(1.0) + + host = "0.0.0.0" if mcp_server_cfg.expose_server else "127.0.0.1" + port = mcp_server_cfg.mcp_server_port + + config = uvicorn.Config( + app=mcp_app, + host=host, + port=port, + log_level="info", + ) + # IMPORTANT: we're not in the main thread + config.install_signal_handlers = False + + server = uvicorn.Server(config) + # This call blocks *this* thread; it's fine because we're daemonizing the thread + server.run() + + +def _run_driver_and_shutdown(conductor: Conductor, problem_filter: str = None, use_external_harness: bool = False): + """Run the benchmark driver, stash results, then tell the API to exit.""" + results = driver_loop(conductor, problem_filter=problem_filter, use_external_harness=use_external_harness) + setattr(main, "results", results) + # ⬇️ Ask the API server (running in main thread) to stop so we can write CSV + request_shutdown() + + +def run_dashboard_server(): + """Entry point for multiprocessing child: construct Dash in child process.""" + # Silence child process stdout/stderr to prevent output from being printed + import logging + import os + import sys + + # Redirect stdout and stderr to devnull + try: + sys.stdout = open(os.devnull, "w") + sys.stderr = open(os.devnull, "w") + except Exception: + pass + + # Also silence logging output + logging.getLogger("werkzeug").setLevel(logging.ERROR) + logging.getLogger("dash").setLevel(logging.ERROR) + + # Create and run the dashboard server + server = SREGymDashboardServer(host="127.0.0.1", port=11451, debug=False) + server.run(threaded=False) + + +def start_dashboard_process(): + """Start the dashboard server in a separate process and return the process object.""" + # Set multiprocessing start method to 'spawn' for better cross-platform compatibility + try: + multiprocessing.set_start_method("spawn", force=True) + except RuntimeError: + # Already set, ignore + pass + + # Start dashboard in a separate process + dashboard_process = None + try: + dashboard_process = multiprocessing.Process( + target=run_dashboard_server, + name="dashboard-server", + daemon=True, # Daemon process will be terminated when main process exits + ) + dashboard_process.start() + # Give dashboard a moment to start up + time.sleep(2) + except Exception as e: + print(f"⚠️ Failed to start dashboard server: {e}", file=sys.stderr) + + return dashboard_process + + +def main(): + # Parse command-line arguments + parser = argparse.ArgumentParser(description="Run SREGym benchmark suite") + parser.add_argument( + "--problem", + type=str, + default=None, + help="Run only a specific problem by its ID (e.g., 'target_port')", + ) + parser.add_argument( + "--use-external-harness", action="store_true", help="For use in external harnesses, deploy the fault and exit." + ) + args = parser.parse_args() + + # set up the logger + init_logger() + + # Start dashboard in a separate process + dashboard_process = None + if not args.use_external_harness: + dashboard_process = start_dashboard_process() + + conductor = Conductor() + + # Start the driver in the background; it will call request_shutdown() when finished + driver_thread = threading.Thread( + target=_run_driver_and_shutdown, + args=(conductor, args.problem, args.use_external_harness), + name="driver", + daemon=True, + ) + driver_thread.start() + + # Start the MCP server in the background (lets the main thread run the Conductor API) + if not args.use_external_harness: # No need for MCP if using external harness + mcp_thread = threading.Thread( + target=start_mcp_server_after_api, + name="mcp-server", + daemon=True, + ) + mcp_thread.start() + + # Start the Conductor HTTP API in the MAIN thread (blocking) + try: + run_api(conductor) + except KeyboardInterrupt: + # If interrupted, still try to shut down cleanly + request_shutdown() + finally: + # Give driver a moment to finish setting results + driver_thread.join(timeout=5) + + # Terminate dashboard process gracefully if it's still running + if dashboard_process is not None and dashboard_process.is_alive() and not args.use_external_harness: + try: + # Send SIGTERM to allow graceful shutdown (triggers _export_on_exit) + dashboard_process.terminate() + # Give dashboard time to export trace data (export can take a few seconds) + dashboard_process.join(timeout=5) + # Force kill only if still alive after graceful shutdown timeout + if dashboard_process.is_alive(): + print("⚠️ Dashboard process did not exit gracefully, forcing termination...", file=sys.stderr) + dashboard_process.kill() + dashboard_process.join(timeout=1) + except Exception as e: + print(f"⚠️ Error terminating dashboard process: {e}", file=sys.stderr) + + # When API shuts down, collect results from driver + results = getattr(main, "results", []) + + if results: + aggregated = {} + for entry in results: + for agent_name, agent_rows in entry.items(): + aggregated.setdefault(agent_name, []).extend(agent_rows) + + for agent_name, agent_results in aggregated.items(): + fieldnames = sorted({key for row in agent_results for key in row.keys()}) + current_date_time = get_current_datetime_formatted() + csv_path = f"{current_date_time}_{agent_name}_ALL_results.csv" + with open(csv_path, "w", newline="") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(agent_results) + print(f"✅ Benchmark complete! Results for {agent_name} written to {csv_path}") + else: + print("⚠️ No results to write.") + + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/mcp_server/__init__.py b/mcp_server/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mcp_server/configs/__init__.py b/mcp_server/configs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mcp_server/configs/kubectl_session_cfg.py b/mcp_server/configs/kubectl_session_cfg.py new file mode 100644 index 0000000..23a53c3 --- /dev/null +++ b/mcp_server/configs/kubectl_session_cfg.py @@ -0,0 +1,14 @@ +from pydantic import BaseModel, Field + + +class KubectlSessionCfg(BaseModel): + """ kubectl tool session config""" + session_cache_size: int = Field( + description="Max size of the session cache", + gt=100, + ) + + session_ttl: int = Field( + description="Time to live after last time session access (in seconds)", + gt=30, + ) diff --git a/mcp_server/configs/kubectl_tool_cfg.py b/mcp_server/configs/kubectl_tool_cfg.py new file mode 100644 index 0000000..195b901 --- /dev/null +++ b/mcp_server/configs/kubectl_tool_cfg.py @@ -0,0 +1,75 @@ +from pydantic import BaseModel, Field, field_validator +from pathlib import Path +import os +import logging + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger("all.mcp.kubectl_tool_cfg") + +parent_parent_dir = Path(__file__).resolve().parent.parent +output_parent_dir = parent_parent_dir / "data" + + +class KubectlToolCfg(BaseModel): + retry_wait_time: float = Field( + default=60, + description="Seconds to wait between retries.", + gt=0 + ) + + forbid_unsafe_commands: bool = Field( + default=False, + description="Forbid unsafe commands in the rollback tool.", + ) + + verify_dry_run: bool = Field( + default=False, + description="Enable verification of dry run results after real running.", + ) + + # Update "default" with session id if using remote mcp server + # If you see default dir, something went wrong. + output_dir: str = Field( + default=str(output_parent_dir / "default"), + description="Directory to store some data used by kubectl server." + ) + + namespace: str = Field( + default="", + description="Kubernetes namespace to use for the agent.", + ) + + use_rollback_stack: bool = Field( + default=True, + description="Enable rollback stack for the rollback tool.", + ) + + """ Rollback Tool Configuration """ + validate_rollback: bool = Field( + default=False, + description="Enable generation of validation information", + ) + + clear_replicaset: bool = Field( + default=True, + description="Enable clearing of replicaset after rolling back deployment.", + ) # Warning: This part may be harmful to the system. Use with caution. + + clear_rs_wait_time: float = Field( + default=5, + description="Seconds to wait before clearing replicaset.", + ) + + @field_validator("output_dir") + @classmethod + def validate_output_dir(cls, v): + output_dir = v + if not os.path.exists(output_dir): + logger.debug(f"creating output directory {v}") + os.makedirs(output_dir, exist_ok=True) + else: + logger.debug(f"Directory {v} exists already") + + if not os.access(output_dir, os.W_OK): + raise PermissionError(f"Output directory {output_dir} is not writable.") + return output_dir diff --git a/mcp_server/configs/load_all_cfg.py b/mcp_server/configs/load_all_cfg.py new file mode 100644 index 0000000..373f346 --- /dev/null +++ b/mcp_server/configs/load_all_cfg.py @@ -0,0 +1,49 @@ +import os + +from dotenv import load_dotenv + +from mcp_server.configs.kubectl_session_cfg import KubectlSessionCfg +from mcp_server.configs.mcp_server_cfg import McpServerCfg + +load_dotenv() + + +def str_to_bool(s: str) -> bool: + """ + Convert a string to a boolean value. + + True values: 'true', '1', 'yes', 'y', 'on' + False values: 'false', '0', 'no', 'n', 'off' + + Raises: + ValueError: if the string does not represent a boolean. + + Args: + s (str): The string to convert. + + Returns: + bool: The converted boolean value. + """ + if not isinstance(s, str): + raise TypeError("Input must be a string.") + + true_values = {"true", "1", "yes", "y", "on"} + false_values = {"false", "0", "no", "n", "off"} + + s_lower = s.strip().lower() + if s_lower in true_values: + return True + elif s_lower in false_values: + return False + else: + raise ValueError(f"Invalid literal for boolean: '{s}'") + + +mcp_server_cfg = McpServerCfg( + mcp_server_port=int(os.getenv("MCP_SERVER_PORT", "9954")), + expose_server=str_to_bool(os.getenv("EXPOSE_SERVER", "False")), +) + +kubectl_session_cfg = KubectlSessionCfg( + session_cache_size=int(os.getenv("SESSION_CACHE_SIZE", "10000")), session_ttl=int(os.getenv("SESSION_TTL", "600")) +) diff --git a/mcp_server/configs/mcp_server_cfg.py b/mcp_server/configs/mcp_server_cfg.py new file mode 100644 index 0000000..68e6b82 --- /dev/null +++ b/mcp_server/configs/mcp_server_cfg.py @@ -0,0 +1,16 @@ +import os + +from dotenv import load_dotenv +from pydantic import BaseModel, Field + + +class McpServerCfg(BaseModel): + """ mcp server config""" + mcp_server_port: int = Field( + description="port number of mcp server", + gt=0, + ) + + expose_server: bool = Field( + description="If true, will use 0.0.0.0 for arg host otherwise use 127.0.0.0" + ) diff --git a/mcp_server/example.txt b/mcp_server/example.txt new file mode 100644 index 0000000..95d09f2 --- /dev/null +++ b/mcp_server/example.txt @@ -0,0 +1 @@ +hello world \ No newline at end of file diff --git a/mcp_server/jaeger_server.py b/mcp_server/jaeger_server.py new file mode 100644 index 0000000..f385f0c --- /dev/null +++ b/mcp_server/jaeger_server.py @@ -0,0 +1,139 @@ +import logging +import os +from datetime import datetime, timedelta + +from fastmcp import FastMCP + +from mcp_server.utils import ObservabilityClient + +logger = logging.getLogger("all.mcp.jaeger_server") +logger.info("Starting Jaeger MCP Server") +mcp = FastMCP("Jaeger MCP Server") + + +@mcp.tool(name="get_services") +def get_services() -> str: + """Retrieve the list of service names from the Grafana instance. + + Args: + + Returns: + str: String of a list of service names available in Grafana or error information. + """ + + logger.debug("[ob_mcp] get_services called, getting jaeger services") + jaeger_url = os.environ.get("JAEGER_BASE_URL", None) + if jaeger_url is None: + err_msg = "JAEGER_BASE_URL environment variable is not set!" + logger.error(err_msg) + raise RuntimeError(err_msg) + jaeger_client = ObservabilityClient(jaeger_url) + try: + url = f"{jaeger_url}/api/services" + response = jaeger_client.make_request("GET", url) + logger.debug(f"[ob_mcp] get_services status code: {response.status_code}") + logger.debug(f"[ob_mcp] get_services result: {response}") + logger.debug(f"[ob_mcp] result: {response.json()}") + services = str(response.json()["data"]) + return services if services else "None" + except Exception as e: + err_str = f"[ob_mcp] Error querying get_services: {str(e)}" + logger.error(err_str) + return err_str + + +@mcp.tool(name="get_operations") +def get_operations(service: str) -> str: + """Query available operations for a specific service from the Grafana instance. + + Args: + service (str): The name of the service whose operations should be retrieved. + + Returns: + str: String of a list of operation names associated with the specified service or error information. + """ + + logger.debug("[ob_mcp] get_operations called, getting jaeger operations") + jaeger_url = os.environ.get("JAEGER_BASE_URL", None) + if jaeger_url is None: + err_msg = "JAEGER_BASE_URL environment variable is not set!" + logger.error(err_msg) + raise RuntimeError(err_msg) + jaeger_client = ObservabilityClient(jaeger_url) + try: + url = f"{jaeger_url}/api/operations" + params = {"service": service} + response = jaeger_client.make_request("GET", url, params=params) + logger.debug(f"[ob_mcp] get_operations: {response.status_code}") + operations = str(response.json()["data"]) + return operations if operations else "None" + except Exception as e: + err_str = f"[ob_mcp] Error querying get_operations: {str(e)}" + logger.error(err_str) + return err_str + + +@mcp.tool(name="get_traces") +def get_traces(service: str, last_n_minutes: int) -> str: + """Get Jaeger traces for a given service in the last n minutes. + + Args: + service (str): The name of the service for which to retrieve trace data. + last_n_minutes (int): The time range (in minutes) to look back from the current time. + + Returns: + str: String of Jaeger traces or error information + """ + + logger.debug("[ob_mcp] get_traces called, getting jaeger traces") + jaeger_url = os.environ.get("JAEGER_BASE_URL", None) + if jaeger_url is None: + err_msg = "JAEGER_BASE_URL environment variable is not set!" + logger.error(err_msg) + raise RuntimeError(err_msg) + jaeger_client = ObservabilityClient(jaeger_url) + try: + url = f"{jaeger_url}/api/traces" + start_time = datetime.now() - timedelta(minutes=last_n_minutes) + start_time = int(start_time.timestamp() * 1_000_000) + end_time = int(datetime.now().timestamp() * 1_000_000) + logger.debug(f"[ob_mcp] get_traces start_time: {start_time}, end_time: {end_time}") + params = { + "service": service, + "start": start_time, + "end": end_time, + "limit": 20, + } + response = jaeger_client.make_request("GET", url, params=params) + logger.debug(f"[ob_mcp] get_traces: {response.status_code}") + traces = str(response.json()["data"]) + return traces if traces else "None" + except Exception as e: + err_str = f"[ob_mcp] Error querying get_traces: {str(e)}" + logger.error(err_str) + return err_str + + +@mcp.tool(name="get_dependency_graph") +def get_dependency_graph(last_n_minutes: int = 30) -> str: + """ + Get service dependency graph from Jaeger's native dependencies API. + Args: + last_n_minutes (int): The time range (in minutes) to look back from the current time. + Returns: + str: JSON object representing the dependency graph. + """ + jaeger_url = os.environ.get("JAEGER_BASE_URL") + if not jaeger_url: + raise RuntimeError("JAEGER_BASE_URL environment variable is not set!") + + client = ObservabilityClient(jaeger_url) + end_time = int(datetime.now().timestamp() * 1000) + start_time = int((datetime.now() - timedelta(minutes=last_n_minutes)).timestamp() * 1000) + + url = f"{jaeger_url}/api/dependencies" + params = {"endTs": end_time, "lookback": last_n_minutes * 60 * 1000} + + response = client.make_request("GET", url, params=params) + logger.info(f"[ob_mcp] get_dependency_graph: {response.status_code}") + return str(response.json()) diff --git a/mcp_server/kubectl_mcp_tools.py b/mcp_server/kubectl_mcp_tools.py new file mode 100644 index 0000000..c1e82a6 --- /dev/null +++ b/mcp_server/kubectl_mcp_tools.py @@ -0,0 +1,108 @@ +import logging + +from fastmcp import Context, FastMCP +from yarl import URL + +from clients.stratus.stratus_utils.get_logger import get_logger +from mcp_server.configs.load_all_cfg import kubectl_session_cfg +from mcp_server.kubectl_server_helper.kubectl_tool_set import KubectlToolSet +from mcp_server.kubectl_server_helper.sliding_lru_session_cache import SlidingLRUSessionCache + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger("all.mcp.kubectl_mcp_tools") + +sessionCache = SlidingLRUSessionCache( + max_size=kubectl_session_cfg.session_cache_size, ttl_seconds=kubectl_session_cfg.session_ttl +) + +logger = get_logger() + +kubectl_mcp = FastMCP("Kubectl MCP Server") +logger.info("Starting Kubectl MCP Server") + + +def extract_session_id(ctx: Context): + """ + Use this function to get the session id of the request + First use custom session id. + """ + ssid = ctx.request_context.request.headers.get("sregym_ssid") + if ssid is None: + str_url = str(ctx.request_context.request.url) + url = URL(str_url) + ssid = url.query.get("session_id") + return ssid + + +def get_tools(session_id: str) -> KubectlToolSet: + """ + Get the tools related with session_id. If no + tools, create a new one for this session. + """ + tool = sessionCache.get(session_id) + if tool is not None: + return tool + + logger.debug(f"Creating a new kubectl tool for session {session_id}.") + tool = KubectlToolSet(session_id) + sessionCache[session_id] = tool + return tool + + +@kubectl_mcp.tool() +def exec_kubectl_cmd_safely(cmd: str, ctx: Context) -> str: + """ + Use this function to execute kubectl commands. + Args: + cmd: The command you want to execute in a CLI to + manage a k8s cluster. It should start with "kubectl". + ctx: If you are an agent, you can safely ignore this + argument. + Returns: + The result of trying to execute cmd. + """ + ssid = extract_session_id(ctx) + kubctl_tool = get_tools(ssid) + logger.debug(f'session {ssid} is using tool "exec_kubectl_cmd_safely"; Command: {cmd}.') + result = kubctl_tool.cmd_runner.exec_kubectl_cmd_safely(cmd) + assert isinstance(result, str) + return result + + +@kubectl_mcp.tool() +def rollback_command(ctx: Context) -> str: + """ + Use this function to roll back the last kubectl command + you successfully executed with the "exec_kubectl_cmd_safely" tool. + Args: + ctx: If you are an agent, you can safely ignore this + argument. + Returns: + The result of trying to roll back the last kubectl command. + """ + ssid = extract_session_id(ctx) + kubectl_tool = get_tools(ssid) + logger.debug(f'session {ssid} is using tool "rollback_command".') + result = kubectl_tool.rollback_tool.rollback() + assert isinstance(result, str) + return f"{result}, action_stack: {kubectl_tool.rollback_tool.action_stack}" + + +@kubectl_mcp.tool() +def get_previous_rollbackable_cmd(ctx: Context) -> str: + """ + Use this function to get a list of commands you + previously executed that could be roll-backed. + + Returns: + Text content that shows a list of commands you + previously executed that could be roll-backed. + When you call rollback_command tool multiple times, + you will roll-back previous commands in the order + of the returned list. + """ + ssid = extract_session_id(ctx) + kubctl_tool = get_tools(ssid) + logger.debug(f'session {ssid} is using tool "get_previous_rollbackable_cmd".') + cmds = kubctl_tool.rollback_tool.get_previous_rollbackable_cmds() + return "\n".join([f"{i + 1}. {cmd}" for i, cmd in enumerate(cmds)]) diff --git a/mcp_server/kubectl_server_helper/__init__.py b/mcp_server/kubectl_server_helper/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/mcp_server/kubectl_server_helper/action_stack.py b/mcp_server/kubectl_server_helper/action_stack.py new file mode 100644 index 0000000..1b66f8f --- /dev/null +++ b/mcp_server/kubectl_server_helper/action_stack.py @@ -0,0 +1,51 @@ +import logging + +from mcp_server.kubectl_server_helper.rollback_tool import RollbackNode + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + handlers=[ + # logging.FileHandler("action_stack.log"), + logging.StreamHandler() # This will output to console too + ], +) + + +class ActionStack: + def __init__(self): + self.stack = [] + + def push(self, node: RollbackNode): + """Push a new action onto the stack.""" + self.stack.append(node) + logging.info(f"Pushed action onto stack: {node}") + + def pop(self): + """Pop the last action from the stack (for rollback).""" + if self.stack: + logging.info(f"Popped action from stack: {self.stack[-1]}") + + return self.stack.pop() if self.stack else None + + def peek(self): + """View the last action without removing it.""" + return self.stack[-1] if self.stack else None + + def clear(self): + """Clear the entire stack.""" + self.stack = [] + + def __str__(self) -> str: + if not self.stack: + return "ActionStack: [Empty]" + + result = ["ActionStack:"] + for i, node in enumerate(reversed(self.stack)): + index = len(self.stack) - i - 1 + result.append(f" [{index}] {node}") + + return "\n".join(result) + + def __repr__(self) -> str: + return self.__str__() diff --git a/mcp_server/kubectl_server_helper/cmd_category.py b/mcp_server/kubectl_server_helper/cmd_category.py new file mode 100644 index 0000000..1921176 --- /dev/null +++ b/mcp_server/kubectl_server_helper/cmd_category.py @@ -0,0 +1,78 @@ +kubectl_safe_commands = [ + "kubectl annotate", + "kubectl api-resources", + "kubectl api-version", + "kubectl attach", + "kubectl auth", + "kubectl cluster-info", + "kubectl completion", + "kubectl describe", + "kubectl diff", + "kubectl drain", + "kubectl events", + "kubectl explain", + "kubectl expose", + "kubectl get", + "kubectl logs", + "kubectl options", + "kubectl top", + "kubectl version", +] + +kubectl_unsafe_commands = [ + "kubectl apply", + "kubectl autoscale", + "kubectl certificate", + "kubectl config", + "kubectl cordon", + "kubectl cp", + "kubectl create", + "kubectl delete", + # exec likely needs special consideration, since it *could* be interactive if they did exec /bin/bash + "kubectl exec", + "kubectl kustomize", + "kubectl label", + "kubectl patch", + "kubectl plugins", + "kubectl port-forward", + "kubectl proxy", + "kubectl replace", + "kubectl rollout", + "kubectl run", + "kubectl scale", + "kubectl set", + "kubectl uncordon", + "kubectl taint", +] + +# Interactive commands like edit and debug don't work with our agent +kubectl_unsupported_commands = [ + "kubectl debug", + "kubectl edit", + "kubectl wait", + "kubectl proxy", # This will keep running + "kubectl port-forward", # This will keep running + "kubectl cp", # Should not support file based operations +] + +# Commands that support dry-run +kubectl_dry_run_commands = [ + "kubectl annotate", + "kubectl drain", + "kubectl expose", + "kubectl apply", + "kubectl autoscale", + "kubectl cordon", + "kubectl create", + "kubectl delete", + "kubectl label", + "kubectl patch", + "kubectl replace", + "kubectl run", + "kubectl scale", + "kubectl set", + "kubectl rollout undo", + "kubectl uncordon", + "kubectl taint", + "kubectl auth reconcile", +] diff --git a/mcp_server/kubectl_server_helper/kubectl.py b/mcp_server/kubectl_server_helper/kubectl.py new file mode 100644 index 0000000..cdac364 --- /dev/null +++ b/mcp_server/kubectl_server_helper/kubectl.py @@ -0,0 +1,183 @@ +"""Interface to K8S controller service.""" + +import logging +import re +import shlex +import subprocess # nosec B404 +from enum import Enum + +import bashlex +from kubernetes import config +from pydantic.dataclasses import dataclass + +from mcp_server.kubectl_server_helper.utils import parse_text + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + + +class DryRunStatus(Enum): + SUCCESS = "SUCCESS" + NOEFFECT = "NOEFFECT" + ERROR = "ERROR" + + +@dataclass +class DryRunResult: + status: DryRunStatus + description: str + result: list[str] + + +class KubeCtl: + + def __init__(self): + """Initialize the KubeCtl object and load the Kubernetes configuration.""" + config.load_kube_config() + # self.core_v1_api = client.CoreV1Api() + # self.apps_v1_api = client.AppsV1Api() + + @staticmethod + def exec_command(command: str, input_data=None): + """Execute an arbitrary kubectl command.""" + if input_data is not None: + input_data = input_data.encode("utf-8") + try: + out = subprocess.run(command, shell=True, check=True, capture_output=True, input=input_data) # nosec B602 + out.stdout = out.stdout.decode("utf-8") + out.stderr = out.stderr.decode("utf-8") + return out + except subprocess.CalledProcessError as e: + e.stderr = e.stderr.decode("utf-8") + return e + + @staticmethod + def exec_command_result(command: str, input_data=None) -> str: + result = KubeCtl.exec_command(command, input_data) + if result.returncode == 0: + logger.info(f"Command execution:\n{parse_text(result.stdout, 500)}") + return result.stdout + else: + logger.error(f"Error executing kubectl command:\n{result.stderr}") + return f"Error executing kubectl command:\n{result.stderr}" + + @staticmethod + def extract_namespace_from_command(command: str) -> str: + """ + Returns the namespace. + """ + namespace = None + command_parts = list(bashlex.split(command)) + for i, part in enumerate(command_parts): + if part == "-n" or part == "--namespace": + if i + 1 < len(command_parts): + namespace = command_parts[i + 1] + break + elif part.startswith("--namespace="): + namespace = part.split("=")[1] + break + return namespace + + @staticmethod + def insert_flags(command: str, flags=str | list[str]) -> str: + """ + Insert flags into a kubectl command. + Args: + command (str | list[str]): The kubectl command to modify. + flags (str | list[str]): The flags to insert into the command. + Returns: + str | list[str]: The modified kubectl command with the flags inserted. + The type is the same as the input command. + """ + flags_parsed = shlex.join(flags) if isinstance(flags, list) else flags + + position = None + last_word = None + + def traverse_AST(node): + if node.kind == "word": + nonlocal position + nonlocal last_word + if position is None: + if node.word == "--": + position = node.pos + if node.word == "-" and last_word is not None and last_word.word == "-f": + position = last_word.pos + last_word = node + if hasattr(node, "parts"): + for part in node.parts: + traverse_AST(part) + + for parts in bashlex.parse(command): + traverse_AST(parts) + + if position is None: + return command + " " + flags_parsed + else: + position = position[0] + return command[:position] + " " + flags_parsed + " " + command[position:] + + @staticmethod + def dry_run_json_output(command: str, keylist: list[str] | str | None = None) -> DryRunResult: + """ """ + dry_run_arguments = ["--dry-run=server"] + + if isinstance(keylist, list) and len(keylist) != 0: + keylist = list(map(lambda x: f"{{{x}}}", keylist)) + jsonpath = "$".join(keylist) + dry_run_arguments.extend(["-o", f"jsonpath='[[[{jsonpath}]]]'"]) + elif isinstance(keylist, str): + # This case is for kubectl delete, which only supports: + # kubectl delete -o name + dry_run_arguments.extend(["-o", keylist]) + + dry_run_command = KubeCtl.insert_flags(command, dry_run_arguments) + dry_run_result = subprocess.run(dry_run_command, shell=True, capture_output=True, text=True) # nosec B602 + + if dry_run_result.returncode == 0: + if len(dry_run_result.stdout.strip()) == 0: + return DryRunResult( + status=DryRunStatus.NOEFFECT, + description="The dry-run output is empty. Possibly this command won't affect any resources.", + result=[], + ) + + if isinstance(keylist, list) and len(keylist) != 0: + resource = re.search(r"\[\[\[(.*?)\]\]\]", dry_run_result.stdout, re.DOTALL) + if resource is None: + raise RuntimeError("Unhandled dry-run output format.") + resource = resource.group(1).strip() + if resource.count("$") + 1 != len(keylist): + raise RuntimeError(f"Invalid resource format in dry-run output. {resource}") + resources = [r.strip() for r in resource.split("$")] + elif isinstance(keylist, str): + resources = [r.strip() for r in dry_run_result.stdout.split("/")] + if len(resources) != 2: + raise RuntimeError(f"Invalid resource format in dry-run output. {dry_run_result.stdout}") + else: + resources = [dry_run_result.stdout] + + return DryRunResult( + status=DryRunStatus.SUCCESS, + description="Dry run executed successfully.", + result=resources, + ) + else: + if "error: unknown flag: --dry-run" in dry_run_result.stderr: + return DryRunResult( + status=DryRunStatus.NOEFFECT, + description="Dry-run not supported. Possibly it's a safe command.", + result=[], + ) + elif "can't be used with attached containers options" in dry_run_result.stderr: + return DryRunResult( + status=DryRunStatus.ERROR, + description="Interactive command is not supported.", + result=[], + ) + else: + return DryRunResult( + status=DryRunStatus.ERROR, + description=f"Dry-run failed. Potentially it's an invalid command. stderr: {parse_text(dry_run_result.stderr, 200)}", + result=[], + ) diff --git a/mcp_server/kubectl_server_helper/kubectl_cmd_runner.py b/mcp_server/kubectl_server_helper/kubectl_cmd_runner.py new file mode 100644 index 0000000..83bc2fc --- /dev/null +++ b/mcp_server/kubectl_server_helper/kubectl_cmd_runner.py @@ -0,0 +1,269 @@ +import hashlib +import logging +import os +import time + +import bashlex + +from mcp_server.configs.kubectl_tool_cfg import KubectlToolCfg +from mcp_server.kubectl_server_helper.cmd_category import kubectl_safe_commands, kubectl_unsupported_commands +from mcp_server.kubectl_server_helper.kubectl import DryRunResult, DryRunStatus, KubeCtl +from mcp_server.kubectl_server_helper.rollback_tool import RollbackCommand, RollbackNode, RollbackTool +from mcp_server.kubectl_server_helper.utils import cleanup_kubernetes_yaml, parse_text + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger("all.mcp.kubectl_cmd_runner") + + +class KubectlCmdRunner: + def __init__(self, config: KubectlToolCfg, action_stack=None): + self.action_stack = action_stack + self.config = config + + def exec_kubectl_cmd_safely(self, command: str) -> str: + try: + if not command.strip().startswith("kubectl"): + return "Command Rejected: Only kubectl commands are allowed. Please check the command and try again." + + self._check_kubectl_command(command) + + dry_run_result = KubeCtl.dry_run_json_output(command) + + if self.config.forbid_unsafe_commands and not self._is_kubectl_command_safe(command): + return "Command Rejected: Unsafe command detected. Please check the command and try again." + + logger.debug(f"Dry-run result: {dry_run_result.status}, description: {dry_run_result.description}") + + if dry_run_result.status == DryRunStatus.NOEFFECT: + result = self._execute_kubectl_command(command) + elif dry_run_result.status == DryRunStatus.ERROR: + result = dry_run_result.description + + if self.config.verify_dry_run and "Interactive command" not in dry_run_result.description: + # Warning: This is only for testing purposes. It may execute malicious commands. + exception_triggered = False + try: + self._execute_kubectl_command(command) + except Exception as _: # noqa F841 + exception_triggered = True + + if not exception_triggered: + logger.error("Dry-run verification failed (ERROR case)") + + return result + elif dry_run_result.status == DryRunStatus.SUCCESS: + if self.config.use_rollback_stack: + rollback_command = self._gen_rollback_commands(command, dry_run_result) + + if self.config.verify_dry_run: + try: + result = self._execute_kubectl_command(command) + except Exception as e: + logger.error(f"Dry-run verification failed (SUCCESS case): {e}") + raise e + else: + result = self._execute_kubectl_command(command) + + if self.config.use_rollback_stack: + self.action_stack.push(rollback_command) + else: + raise ValueError(f"Unknown dry run status: {dry_run_result.status}") + return parse_text(result) + except ValueError as ve: + logger.error(f"Command Rejected (ValueError): {ve}") + return f"Command Rejected (ValueError): {ve}" + except Exception as exc: + logger.error(f"Command Rejected: {exc}") + return f"Command Rejected: {exc}" + + def _check_kubectl_command(self, command: str) -> None: + # Check interactive subcommands + for c in kubectl_unsupported_commands: + if command.startswith(c): + raise ValueError(f"Interactive command {c} detected. Such commands are not supported.") + + tokens = bashlex.parse(command) + has_redirection = False + + def traverse_AST(node): + if node.kind not in ["command", "heredoc", "redirect", "tilde", "word"]: + if "pipe" in node.kind: + raise ValueError("Pipe commands are forbidden") + raise ValueError(f"Unsupported operator kind: {node.kind}") + + if node.kind == "redirect": + if ">" in node.type: + raise ValueError("Write redirection is forbidden.") + nonlocal has_redirection + if "<" in node.type: + has_redirection = True + + parts = 1 if node.kind == "command" else 0 + if hasattr(node, "parts"): + parts += sum(traverse_AST(part) for part in node.parts) + + if parts > 1: + raise ValueError("Compound commands are forbidden.") + + return parts + + # Check unsupported operators + for part in tokens: + traverse_AST(part) + + # Check interactive flags + parts = list(bashlex.split(command)) + for i, part in enumerate(parts): + if part in ["--interactive", "-i", "--tty", "-t", "--stdin", "-it"]: + raise ValueError( + f"Interactive flag detected: {part}. Such commands are not supported. " + f"Try to use the command non-interactively." + ) + if command.startswith("kubectl logs -f"): + raise ValueError( + f"Interactive flag detected: -f. Such commands are not supported. " + f"Try to use the command non-interactively." + ) + + if part in ["-f", "--filename"] and i + 1 < len(parts) and parts[i + 1] == "-": + if not has_redirection: + raise ValueError("Stdin redirected but no input file provided.") + + if part == "--": + break + + def _is_kubectl_command_safe(self, command: str) -> bool: + for c in kubectl_safe_commands: + if command.startswith(c): + return True + return False + + def _execute_kubectl_command(self, command: str): + logger.debug(f"Executing command: {command}") + result = KubeCtl.exec_command(command) + if result.returncode == 0: + output = parse_text(result.stdout, 1000) + logger.debug(f"Kubectl MCP Tool command execution:\n{output}") + return result.stdout + else: + logger.warning(f"Error executing kubectl command:\n{result.stderr}") + raise RuntimeError(f"Error executing kubectl command:\n{result.stderr}") + + def _gen_rollback_commands(self, command: str, dry_run_result: DryRunResult) -> RollbackNode: + """Generate rollback commands based on the dry-run result.""" + + # We should return this before execution, since kubectl delete will remove the resource + return_value = None + full_state_file = None # For rollback validation + + state_dir = os.path.join(self.config.output_dir, "kubectl_states") + os.makedirs(state_dir, exist_ok=True) + + timestamp = int(time.time()) + cmd_hash = hashlib.md5(command.encode(), usedforsecurity=False).hexdigest()[:8] + state_file = os.path.join(state_dir, f"state_{timestamp}_{cmd_hash}.yaml") + + """ Get the rollback information """ + dry_run_stdout = dry_run_result.result[0] + + namespace = KubeCtl.extract_namespace_from_command(command) + if namespace is None: + # Although should be "default" + namespace = self.config.namespace + + # namespace flag + content + nsp_flag_ctnt = f"-n {namespace}" if namespace else "" + + rollback_commands = [] + + if "created (server dry run)" in dry_run_stdout or "exposed (server dry run)" in dry_run_stdout: + result = KubeCtl.dry_run_json_output(command, "name") + rollback_commands = [ + RollbackCommand( + "command", + "kubectl delete {resource_type} {resource_name} {nsp_flag_ctnt}".format( + resource_type=result.result[0], + resource_name=result.result[1], + nsp_flag_ctnt=nsp_flag_ctnt, + ), + ) + ] + elif "deleted (server dry run)" in dry_run_stdout: + result = KubeCtl.dry_run_json_output(command, "name") + if result.result[0] == "namespace": + raise RuntimeError("Deleting a namespace is not allowed.") + + rollback_commands = [ + self._store_resource_state( + state_file, + result.result[0], + result.result[1], + namespace, + ) + ] + elif "autoscaled (server dry run)" in dry_run_stdout: + hpa = KubeCtl.dry_run_json_output(command, "name") + result = KubeCtl.dry_run_json_output(command, [".spec.scaleTargetRef.kind", ".metadata.name"]) + rollback_commands = [ + RollbackCommand( + "command", + "kubectl delete {resource_type} {resource_name} {nsp_flag_ctnt}".format( + resource_type=hpa.result[0], + resource_name=hpa.result[1], + nsp_flag_ctnt=nsp_flag_ctnt, + ), + ), + self._store_resource_state( + state_file, + result.result[0], + result.result[1], + namespace, + ), + ] + else: + result = KubeCtl.dry_run_json_output(command, "name") + rollback_commands = [ + self._store_resource_state( + state_file, + result.result[0], + result.result[1], + namespace, + ) + ] + + # Generate validation information + if self.config.validate_rollback: + time.sleep(self.config.retry_wait_time) + full_state_file = os.path.join(state_dir, f"validation_{timestamp}_{cmd_hash}.yaml") + full_state = RollbackTool.get_namespace_state(self.config.namespace) + full_state = cleanup_kubernetes_yaml(full_state) + with open(full_state_file, "w") as f: + f.write(full_state) + + return_value = RollbackNode(action=command, rollback=rollback_commands, cluster_state=full_state_file) + + logger.debug(f"Generated rollback action {rollback_commands} for '{command}'.") + if self.config.validate_rollback: + logger.debug(f"Namespace state stored in: {full_state_file}") + + return return_value + + def _store_resource_state( + self, state_file: str, resource_type: str, resource_name: str, namespace: str | None + ) -> RollbackCommand: + namespace_flag = f"-n {namespace}" if namespace else "" + + if resource_name is not None: + state_cmd = f"kubectl get {resource_type} {resource_name} {namespace_flag} -o yaml" + else: + state_cmd = f"kubectl get {resource_type} {namespace_flag} -o yaml" + + logger.debug(f"Capturing cluster state with: {state_cmd}") + + cluster_state = KubeCtl.exec_command_result(state_cmd) + + with open(state_file, "w") as f: + cleaned_state = cleanup_kubernetes_yaml(cluster_state) + f.write(cleaned_state) + + return RollbackCommand("file", state_file) diff --git a/mcp_server/kubectl_server_helper/kubectl_tool_set.py b/mcp_server/kubectl_server_helper/kubectl_tool_set.py new file mode 100644 index 0000000..2c67c2b --- /dev/null +++ b/mcp_server/kubectl_server_helper/kubectl_tool_set.py @@ -0,0 +1,18 @@ +from mcp_server.configs.kubectl_tool_cfg import KubectlToolCfg, output_parent_dir +from mcp_server.kubectl_server_helper.action_stack import ActionStack +from mcp_server.kubectl_server_helper.kubectl_cmd_runner import KubectlCmdRunner +from mcp_server.kubectl_server_helper.rollback_tool import RollbackTool + + +class KubectlToolSet: + def __init__(self, session_id: str): + self.ssid = session_id + + self.config = KubectlToolCfg(output_dir=str(output_parent_dir / self.ssid)) + + self.action_stack = None + if self.config.use_rollback_stack: + self.action_stack = ActionStack() + + self.cmd_runner = KubectlCmdRunner(self.config, self.action_stack) + self.rollback_tool = RollbackTool(self.config, self.action_stack) diff --git a/mcp_server/kubectl_server_helper/rollback_tool.py b/mcp_server/kubectl_server_helper/rollback_tool.py new file mode 100644 index 0000000..3ff45fb --- /dev/null +++ b/mcp_server/kubectl_server_helper/rollback_tool.py @@ -0,0 +1,299 @@ +import logging +import os +import tempfile +import time +import traceback +from typing import Optional + +import yaml +from pydantic.dataclasses import dataclass + +from mcp_server.configs.kubectl_tool_cfg import KubectlToolCfg +from mcp_server.kubectl_server_helper.kubectl import KubeCtl +from mcp_server.kubectl_server_helper.utils import cleanup_kubernetes_yaml, parse_text + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + + +@dataclass +class RollbackCommand: + command_type: str + content: str + + +@dataclass +class RollbackNode: + action: str + rollback: list[RollbackCommand] + cluster_state: str | None = None + + +class RollbackTool: + """Tool to rollback the last agent action by popping from the stack.""" + + def __init__(self, config: KubectlToolCfg, action_stack): + self.action_stack = action_stack + self.config = config + + def _parse_state_source(self, state_source: str) -> str: + yaml_content = "" + if os.path.exists(state_source) and os.path.isfile(state_source): + logger.info(f"Reading cluster state from file: {state_source}") + try: + with open(state_source, "r") as f: + yaml_content = f.read() + except Exception as e: + error_msg = f"Failed to read state file: {e}" + logger.error(error_msg) + return error_msg + else: + # if state_source is direct YAML content + yaml_content = state_source + + return yaml_content + + def _restore_cluster_state(self, state_source: str) -> str: + """Restore cluster state from a saved YAML representation or file path.""" + # Check if state_source is a file path + yaml_content = self._parse_state_source(state_source) + + # Identify the type of resources + try: + resources = list(yaml.safe_load_all(yaml_content)) + except Exception as e: + logger.error(f"Failed to parse YAML: {e}") + return self._apply_yaml_directly(yaml_content) + + if len(resources) == 0: + return "No resources found in state YAML" + + # TODO: rethink if the resources have dependencies + # we need to apply resources in the correct order + return self._apply_resources_in_order(resources, yaml_content) + + def _apply_resources_in_order(self, resources, yaml_content): + """Apply resources in the correct order respecting dependencies.""" + # 1. First identify and apply any CustomResourceDefinitions + crd_resources = [] + regular_resources = [] + + for resource in resources: + if not isinstance(resource, dict) or "kind" not in resource: + continue + + if resource["kind"] == "CustomResourceDefinition": + crd_resources.append(resource) + else: + regular_resources.append(resource) + + # Apply CRDs first if any + if crd_resources: + logger.info("Applying CustomResourceDefinitions first...") + crd_yaml = "" + for crd in crd_resources: + crd_yaml += yaml.dump(crd) + "\n---\n" + + self._apply_yaml_directly(crd_yaml) + # Wait for CRDs to be established + time.sleep(5) + + # TODO: recosider this dependency order + # 2. Apply resources in dependency order + # A more advanced implementation would build a dependency graph + first_tier = ["Namespace", "ConfigMap", "Secret", "ServiceAccount", "Role", "RoleBinding"] + second_tier = ["Service", "PersistentVolumeClaim", "PersistentVolume"] + third_tier = ["DaemonSet", "Job", "CronJob"] + deployment_tier = ["Deployment", "StatefulSet"] + + for tier in [first_tier, second_tier, third_tier, deployment_tier]: + tier_resources = [r for r in regular_resources if r.get("kind") in tier] + if tier_resources: + if tier == deployment_tier: + for resource in tier_resources: + self._apply_yaml_deployment(resource) + + if self.config.clear_replicaset: + time.sleep(self.config.clear_rs_wait_time) + for resource in tier_resources: + self._clear_replicasets(resource) + else: + tier_yaml = "" + for resource in tier_resources: + tier_yaml += yaml.dump(resource) + "\n---\n" + + if tier_yaml: + logger.info(f"Applying {tier} resources...") + self._apply_yaml_directly(tier_yaml) + + remaining = [ + r for r in regular_resources if r.get("kind") not in first_tier + second_tier + third_tier + deployment_tier + ] + if remaining: + remaining_yaml = "" + for resource in remaining: + remaining_yaml += yaml.dump(resource) + "\n---\n" + + if remaining_yaml: + logger.info("Applying remaining resources...") + return self._apply_yaml_directly(remaining_yaml) + + return "Cluster state restored successfully" + + def _apply_yaml_deployment(self, yaml_content): + # TODO improve this using patch + strategy = yaml_content.get("spec", {}).get("strategy", {}) + yaml_content["spec"]["strategy"] = {"type": "Recreate"} + self._apply_yaml_directly(yaml.dump(yaml_content)) + yaml_content["spec"]["strategy"] = strategy + self._apply_yaml_directly(yaml.dump(yaml_content)) + + def _clear_replicasets(self, yaml_content): + namespace = yaml_content.get("metadata", {}).get("namespace", "") + matchlabels = yaml_content.get("spec", {}).get("selector", {}).get("matchLabels", {}) + selector = ",".join([f"{k}={v}" for k, v in matchlabels.items()]) + + rs_selector = ( + f"kubectl get rs -n {namespace} -l {selector} -o jsonpath=" + "'{.items[?(@.status.replicas==0)].metadata.name}'" + ) + + # Actually we can delete all the replica sets here + # But here is the reason why we do like this: + # For the new replica set, we can just preserve it. + # So the 10s we wait is also useful for it being ready. + # A corner case is that, the new replica set is scaled to 0. + # It will be automatically recreated by K8s, and immediately be ready. + + rs_list = KubeCtl.exec_command(rs_selector) + if rs_list.returncode == 0: + rs_list = rs_list.stdout.strip() + if rs_list == "": + logger.info("No ReplicaSets to clear.") + return + delete_cmd = f"kubectl delete rs {rs_list} -n {namespace}" + result = KubeCtl.exec_command(delete_cmd) + if result.returncode == 0: + logger.info(f"Deleted ReplicaSets: [{rs_list}]") + else: + logger.error(f"Failed to delete ReplicaSets [{rs_list}]. Stderr: {result.stderr}") + else: + logger.error(f"Failed to get ReplicaSets. Stderr: {rs_list.stderr}") + + def _apply_yaml_directly(self, yaml_content): + """Helper method to apply YAML directly.""" + if not yaml_content.strip(): + return "No YAML content to apply" + + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as tmp: + tmp.write(yaml_content) + tmp_path = tmp.name + + try: + restore_cmd = f"kubectl apply -f {tmp_path}" + result = KubeCtl.exec_command_result(restore_cmd) + logger.info(f"Applied YAML: {result}") + return result + except Exception as e: + error_msg = f"Failed to apply YAML: {e}" + logger.error(error_msg) + return error_msg + finally: + try: + os.remove(tmp_path) + except Exception as e: + logger.warning(f"Failed to remove temporary file {tmp_path}: {e}") + + @staticmethod + def get_namespace_state(namespace: str | None) -> str: + """Capture the current state of all resources in the cluster.""" + if namespace is None or namespace == "": + all_namespace_flag = "--all-namespaces" + else: + all_namespace_flag = f"-n {namespace}" + all_resources_command = f"kubectl get all -o yaml {all_namespace_flag}" + return KubeCtl.exec_command_result(all_resources_command) + + def compare_states(self, current_state: str, previous_state: str) -> str: + import difflib + + result = difflib.unified_diff( + previous_state.splitlines(keepends=True), + current_state.splitlines(keepends=True), + fromfile="previous_state", + tofile="current_state", + ) + return "".join(result) + + def get_previous_rollbackable_cmds(self) -> list[str]: + return [action.action for action in self.action_stack.stack][::-1] + + def rollback(self) -> str: + if not hasattr(self, "action_stack") or self.action_stack is None: + return "Warning: Action Stack disabled. Stop rolling back." + + try: + if hasattr(self.action_stack, "is_empty") and self.action_stack.is_empty(): + return "No more actions to rollback." + last_action: Optional[RollbackNode] = self.action_stack.pop() + + if last_action is not None: + result = [] + for rollback in last_action.rollback: + if rollback.command_type == "command": + one_step_result = KubeCtl.exec_command(rollback.content) + + if one_step_result.returncode == 0: + output = parse_text(one_step_result.stdout, 1000) + result.append(f"Rollback command: {rollback.content}; " f"Execution result: {output}") + logger.info(result[-1]) + else: + raise RuntimeError(f"Error executing rollback command: {one_step_result.stderr}") + + elif rollback.command_type == "file": + one_step_result = self._restore_cluster_state(rollback.content) + result.append( + f"Try to restore cluster state with file {rollback.content}. " f"Result: {one_step_result}" + ) + logger.info(result[-1]) + else: + raise ValueError(f"Unknown rollback type: {rollback.type}") + + rollback_process_desc = ( + f"Rolled back the previous command: {last_action.action}.\n" + f"-------------------Rollback Process:-------------------\n" + ) + for i, one_step_txt in enumerate(result): + rollback_process_desc += f"\nStep {i + 1}:\n{one_step_txt}\n" + rollback_process_desc += f"-------------------End of Rollback Process:-------------------\n" + + if self.config.validate_rollback: + time.sleep(self.config.retry_wait_time) + current_state = RollbackTool.get_namespace_state(self.config.namespace) + current_state = cleanup_kubernetes_yaml(current_state) + last_state = self._parse_state_source(last_action.cluster_state) + diff = self.compare_states(current_state, last_state) + raw_filename = os.path.basename(last_action.cluster_state).replace("validation_", "") + diff_file = os.path.join( + self.config.output_dir, + "rollback_validation", + f"rollback_diff_{raw_filename}", + ) + os.makedirs(os.path.dirname(diff_file), exist_ok=True) + with open(diff_file, "w") as f: + f.write(diff) + ref_file = os.path.join( + self.config.output_dir, + "rollback_validation", + f"rollback_ref_{raw_filename}", + ) + with open(ref_file, "w") as f: + f.write(current_state) + + return rollback_process_desc + return "No more actions to rollback." + + except Exception as e: + tb = "".join(traceback.format_exception(type(e), e, e.__traceback__)) + logger.error(f"Error traceback: {tb}") + return f"Error during rollback: {str(e)}" diff --git a/mcp_server/kubectl_server_helper/sliding_lru_session_cache.py b/mcp_server/kubectl_server_helper/sliding_lru_session_cache.py new file mode 100644 index 0000000..15717d9 --- /dev/null +++ b/mcp_server/kubectl_server_helper/sliding_lru_session_cache.py @@ -0,0 +1,121 @@ +import logging +import os +import shutil +import threading +import time +from collections import OrderedDict +from pathlib import Path + +from mcp_server.kubectl_server_helper.kubectl_tool_set import KubectlToolSet + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) +mcp_data_dir = Path(__file__).parent.parent / "data" + + +class SlidingLRUSessionCache: + """ + This is a container that used to hold session data of kubectl mcp tools. + Features: + Fixed max_size: evicts least recently used items + Sliding TTL: expiration timer is refreshed on every access + Thread-safe (via threading.Lock) + Automatic cleanup on access and insertion + """ + + def __init__(self, max_size: int, ttl_seconds: int | float): + self.max_size = max_size + self.ttl = ttl_seconds + self.lock = threading.RLock() + self.cache = OrderedDict() # key -> (value, last_access_time) + + def __getitem__(self, key) -> KubectlToolSet: + with self.lock: + self.clean_expired() + if key not in self.cache: + raise KeyError(key) + + value, last_access = self.cache[key] + # Refresh TTL (sliding expiration) + logger.info(f"Accessing item with key {key}. TTL is refreshed.") + now = time.time() + self.cache.move_to_end(key) + self.cache[key] = (value, now) + return value + + def __setitem__(self, key, value: KubectlToolSet): + with self.lock: + now = time.time() + if key in self.cache: + self.cache.move_to_end(key) + self.cache[key] = (value, now) + + if self.__len__() > self.max_size: + to_del = next(iter(self.cache)) + logger.info(f"Clean up LRU item with key {to_del} as maxsize is reached.") + self.__delitem__(to_del) # remove LRU + + def __delitem__(self, key): + with self.lock: + tool, last_access = self.cache[key] + self._clean_up_tool(key, tool) + del self.cache[key] + + # length of unexpired ones + def __len__(self): + with self.lock: + self.clean_expired() + return len(self.cache) + + def clean_expired(self): + with self.lock: + now = time.time() + to_dels = [] + for key in self.cache: + value, last_access = self.cache[key] + if now - last_access >= self.ttl: + to_dels.append(key) + else: + # all the items behind the first unexpired shouldn't be expired either. + break + + for to_del in to_dels: + logger.info(f"Clean up expired items with key {to_del}.") + self.__delitem__(to_del) + + def get(self, key, default=None): + """ + Use this method to get tools. If the returned + result is None, it means the key doesn't exist. + """ + try: + return self[key] + except KeyError: + return default + + def set(self, key, value): + self[key] = value + + def size(self): + return len(self) + + def _clean_up_tool(self, key, tool: KubectlToolSet): + """ + Clean up the directory created for the tool + related with the session {key} + """ + opt_dir = Path(tool.config.output_dir) + if os.path.exists(opt_dir) and os.path.isdir(opt_dir): + if opt_dir.parent == mcp_data_dir: + logger.info(f"Tool file directory {opt_dir} of session {key} will be deleted.") + shutil.rmtree(opt_dir) + else: + logger.info( + f"Tool file directory {opt_dir} of session {key} is not the default one. " + f"For safety issues, please clean it up by yourself." + ) + else: + logger.info( + f"Tool file directory {opt_dir} of session {key} does not exist when trying to clean it or " + f"path {opt_dir} is not a valid directory." + ) diff --git a/mcp_server/kubectl_server_helper/utils.py b/mcp_server/kubectl_server_helper/utils.py new file mode 100644 index 0000000..43990af --- /dev/null +++ b/mcp_server/kubectl_server_helper/utils.py @@ -0,0 +1,50 @@ +import logging +import yaml + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + + +def parse_text(text, max_length=10000): + """ + Parse and truncate text if it's too long. + + Args: + text (str): The text to parse + + Returns: + str: The parsed text + """ + # Truncate if needed to avoid token limits + if len(text) > max_length: + return text[:max_length] + "... [truncated]" + return text + + +def cleanup_kubernetes_yaml(cluster_state: str) -> str: + object = None + + try: + object = list(yaml.safe_load_all(cluster_state)) + except Exception as e: + logger.error(f"Yaml cleaner: Failed to parse YAML: {e}") + return "" + + def recursive_remove(obj): + if isinstance(obj, dict): + obj.get("metadata", {}).pop("resourceVersion", None) + obj.get("metadata", {}).get("annotations", {}).pop("kubectl.kubernetes.io/last-applied-configuration", None) + obj.pop("uid", None) + for k, v in obj.items(): + if k == "ownerReferences": + continue + # Should not modify the last-applied-configuration string + if isinstance(v, dict) or isinstance(v, list): + recursive_remove(v) + elif isinstance(obj, list): + for item in obj: + recursive_remove(item) + + recursive_remove(object) + + return yaml.dump_all(object) diff --git a/mcp_server/prometheus_server.py b/mcp_server/prometheus_server.py new file mode 100644 index 0000000..1fbdbb3 --- /dev/null +++ b/mcp_server/prometheus_server.py @@ -0,0 +1,45 @@ +import logging +import os + +from fastmcp import FastMCP + +from clients.stratus.stratus_utils.get_logger import get_logger +from mcp_server.utils import ObservabilityClient + +logger = get_logger() +logger.info("Starting Prometheus MCP Server") + +mcp = FastMCP("Prometheus MCP Server") + + +@mcp.tool(name="get_metrics") +def get_metrics(query: str) -> str: + """Query real-time metrics data from the Prometheus instance. + + Args: + query (str): A Prometheus Query Language (PromQL) expression used to fetch metric values. + + Returns: + str: String of metric results, including timestamps, values, and labels or error information. + """ + + logger.info("[prom_mcp] get_metrics called, getting prometheus metrics") + prometheus_port = os.environ.get("PROMETHEUS_PORT", None) + if prometheus_port is None: + err_msg = "PROMETHEUS_PORT environment variable is not set!" + logger.error(err_msg) + raise RuntimeError(err_msg) + prometheus_url = "http://localhost:" + os.environ["PROMETHEUS_PORT"] + observability_client = ObservabilityClient(prometheus_url) + try: + url = f"{prometheus_url}/api/v1/query" + param = {"query": query} + response = observability_client.make_request("GET", url, params=param) + logger.info(f"[prom_mcp] get_metrics status code: {response.status_code}") + logger.info(f"[prom_mcp] get_metrics result: {response}") + metrics = str(response.json()["data"]) + return metrics if metrics else "None" + except Exception as e: + err_str = f"[prom_mcp] Error querying get_metrics: {str(e)}" + logger.error(err_str) + return err_str diff --git a/mcp_server/sregym_mcp_server.py b/mcp_server/sregym_mcp_server.py new file mode 100644 index 0000000..4e3dc55 --- /dev/null +++ b/mcp_server/sregym_mcp_server.py @@ -0,0 +1,30 @@ +import logging + +import uvicorn +from fastmcp.server.http import create_sse_app +from starlette.applications import Starlette +from starlette.routing import Mount + +from mcp_server.configs.load_all_cfg import mcp_server_cfg +from mcp_server.jaeger_server import mcp as observability_mcp +from mcp_server.kubectl_mcp_tools import kubectl_mcp +from mcp_server.prometheus_server import mcp as prometheus_mcp +from mcp_server.submit_server import mcp as submit_mcp + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + +app = Starlette( + routes=[ + Mount("/kubectl_mcp_tools", app=create_sse_app(kubectl_mcp, "/messages/", "/sse")), + Mount("/jaeger", app=create_sse_app(observability_mcp, "/messages/", "/sse")), + Mount("/prometheus", app=create_sse_app(prometheus_mcp, "/messages/", "/sse")), + Mount("/submit", app=create_sse_app(submit_mcp, "/messages/", "/sse")), + ] +) + +if __name__ == "__main__": + port = mcp_server_cfg.mcp_server_port + host = "0.0.0.0" if mcp_server_cfg.expose_server else "127.0.0.1" + logger.info("Starting SREGym MCP Server") + uvicorn.run(app, host=host, port=port) diff --git a/mcp_server/submit_server.py b/mcp_server/submit_server.py new file mode 100644 index 0000000..cdeb344 --- /dev/null +++ b/mcp_server/submit_server.py @@ -0,0 +1,129 @@ +import asyncio +import logging +import traceback + +import requests +from fastmcp import FastMCP +from kubernetes import client, config + +from clients.stratus.configs.langgraph_tool_configs import LanggraphToolConfig +from clients.stratus.stratus_utils.get_logger import get_logger +from clients.stratus.tools.localization import get_resource_uid + +logger = get_logger() +logger.info("Starting Submission MCP Server") + +langgraph_tool_config = LanggraphToolConfig() + +mcp = FastMCP("Submission MCP Server") + + +@mcp.tool(name="submit") +def submit(ans: str) -> dict[str, str]: + """Submit task result to benchmark + + Args: + ans (str): task result that the agent submits + + Returns: + dict[str]: http response code and response text of benchmark submission server + """ + + logger.info("[submit_mcp] submit mcp called") + # FIXME: reference url from config file, remove hard coding + url = langgraph_tool_config.benchmark_submit_url + headers = {"Content-Type": "application/json"} + # Match curl behavior: send "\"yes\"" when ans is "yes" + payload = {"solution": f"{ans}"} + + try: + response = requests.post(url, json=payload, headers=headers) + logger.info(f"[submit_mcp] Response status: {response.status_code}, text: {response.text}") + return {"status": str(response.status_code), "text": str(response.text)} + + except Exception as e: + logger.error(f"[submit_mcp] HTTP submission failed: {e}") + return {"status": "N/A", "text": f"[submit_mcp] HTTP submission failed: {e}"} + + +@mcp.tool(name="localization") +async def localization( + resource_type: str, + resource_name: str, + namespace: str, +) -> dict[str, str]: + """Retrieve the UID of a specified Kubernetes resource.""" + config.load_kube_config() + try: + cmd = [ + "kubectl", + "get", + resource_type, + resource_name, + "-n", + namespace, + "-o", + "jsonpath={.metadata.uid}", + ] + logger.info(f"[localization_mcp] Running command: {' '.join(cmd)}") + if resource_type.lower() == "pod": + api = client.CoreV1Api() + obj = api.read_namespaced_pod(name=resource_name, namespace=namespace) + elif resource_type.lower() == "service": + api = client.CoreV1Api() + obj = api.read_namespaced_service(name=resource_name, namespace=namespace) + elif resource_type.lower() == "deployment": + api = client.AppsV1Api() + obj = api.read_namespaced_deployment(name=resource_name, namespace=namespace) + elif resource_type.lower() == "statefulset": + api = client.AppsV1Api() + obj = api.read_namespaced_stateful_set(name=resource_name, namespace=namespace) + elif resource_type.lower() == "persistentvolumeclaim": + api = client.CoreV1Api() + obj = api.read_namespaced_persistent_volume_claim(name=resource_name, namespace=namespace) + elif resource_type.lower() == "persistentvolume": + api = client.CoreV1Api() + obj = api.read_persistent_volume(name=resource_name) + elif resource_type.lower() == "configmap": + api = client.CoreV1Api() + obj = api.read_namespaced_config_map(name=resource_name, namespace=namespace) + elif resource_type.lower() == "replicaset": + api = client.AppsV1Api() + obj = api.read_namespaced_replica_set(name=resource_name, namespace=namespace) + elif resource_type.lower() == "memoryquota": + api = client.CoreV1Api() + obj = api.read_namespaced_resource_quota(name=resource_name, namespace=namespace) + elif resource_type.lower() == "ingress": + api = client.NetworkingV1Api() + obj = api.read_namespaced_ingress(name=resource_name, namespace=namespace) + elif resource_type.lower() == "networkpolicy": + api = client.NetworkingV1Api() + obj = api.read_namespaced_network_policy(name=resource_name, namespace=namespace) + elif resource_type.lower() == "tidbcluster": + api = client.CustomObjectsApi() + obj = api.read_namespaced_custom_object( + group="pingcap.com", version="v1alpha1", namespace=namespace, plural="tidbclusters", name=resource_name + ) + elif resource_type.lower() == "job": + api = client.BatchV1Api() + obj = api.read_namespaced_job(name=resource_name, namespace=namespace) + elif resource_type.lower() == "daemonset": + api = client.AppsV1Api() + obj = api.read_namespaced_daemon_set(name=resource_name, namespace=namespace) + elif resource_type.lower() == "clusterrole": + api = client.RbacAuthorizationV1Api() + obj = api.read_cluster_role(name=resource_name) + elif resource_type.lower() == "clusterrolebinding": + api = client.RbacAuthorizationV1Api() + obj = api.read_cluster_role_binding(name=resource_name) + else: + err_msg = f"Unsupported resource type: {resource_type}" + logger.error(f"[localization_mcp] {err_msg}") + return {"uid": f"Error: {err_msg}"} + uid = obj.metadata.uid + logger.info(f"[localization_mcp] Retrieved UID using Kubernetes client: {uid}") + return {"uid": uid} + except Exception as e: + logger.error(f"[localization_mcp] Exception occurred: {e}") + logger.error(traceback.format_exc()) + return {"uid": f"Exception: {e}"} diff --git a/mcp_server/test_client.py b/mcp_server/test_client.py new file mode 100644 index 0000000..d62b971 --- /dev/null +++ b/mcp_server/test_client.py @@ -0,0 +1,146 @@ +"""Official example mcp client from anthropic, source: https://gist.github.com/zckly/f3f28ea731e096e53b39b47bf0a2d4b1""" + +import asyncio +import json +import sys +from contextlib import AsyncExitStack +from typing import Optional + +from anthropic import Anthropic +from dotenv import load_dotenv +from init_backend import get_llm_backend_for_tools +from mcp import ClientSession, StdioServerParameters +from mcp.client.stdio import stdio_client + +load_dotenv() # load environment variables from .env + + +class MCPClient: + def __init__(self): + # Initialize session and client objects + self.session: Optional[ClientSession] = None + self.exit_stack = AsyncExitStack() + self.anthropic = Anthropic() + + async def connect_to_server(self, server_script_path: str): + """Connect to an MCP server + + Args: + server_script_path: Path to the server script (.py or .js) + """ + is_python = server_script_path.endswith(".py") + is_js = server_script_path.endswith(".js") + if not (is_python or is_js): + raise ValueError("Server script must be a .py or .js file") + + command = sys.executable if is_python else "node" # Uses the current Python interpreter from the activated venv + server_params = StdioServerParameters(command=command, args=[server_script_path], env=None) + + stdio_transport = await self.exit_stack.enter_async_context(stdio_client(server_params)) + self.stdio, self.write = stdio_transport + self.session = await self.exit_stack.enter_async_context(ClientSession(self.stdio, self.write)) + + await self.session.initialize() + + # List available tools + response = await self.session.list_tools() + tools = response.tools + print("\nConnected to server with tools:", [tool.name for tool in tools]) + + async def process_query(self, query: str) -> str: + """Process a query using Claude and available tools""" + messages = query + + response = await self.session.list_tools() + # to make tool calling work on openai. + available_tools = [] + tool_names = [] + for tool in response.tools: + # FIXME: this is just to make this demo work, see below + tool_names.append(tool.name) + for param in tool.inputSchema["properties"].values(): + param["description"] = param["title"] + print(f"tool input schema to openai: {tool.inputSchema}") + # FIXME: When building MCP server tools, compile such object within the definition + # so that the client can use it directly. + available_tools.append( + { + "type": "function", + "function": { + "name": tool.name, + "description": tool.description, + "parameters": tool.inputSchema, + }, + } + ) + + llm = get_llm_backend_for_tools() + finish_reason, response_message = llm.inference( + system_prompt="You are a helpful assistant", + input=messages, + tools=available_tools, + ) + + # Process response and handle tool calls + tool_results = [] + final_text = [] + + if finish_reason == "tool_calls" or finish_reason in tool_names: + tool_name = finish_reason + tool_args = response_message + else: + tool_name = None + tool_args = None + + print(f"tool {tool_name}, args {tool_args}") + # Execute tool call + if finish_reason == "stop": + final_text.append(response_message.content) + else: + result = await self.session.call_tool(tool_name, tool_args) + tool_results.append({"call": tool_name, "result": result}) + print(f"tool result: {result}") + final_text.append(f"[Calling tool {tool_name} with args {tool_args}]") + + return "\n".join(final_text) + + async def chat_loop(self): + """Run an interactive chat loop""" + print("\nMCP Client Started!") + print("Type your queries or 'quit' to exit.") + + while True: + try: + query = input("\nQuery: ").strip() + + if query.lower() == "quit": + break + + response = await self.process_query(query) + print("\n" + response) + + except Exception as e: + print(f"\nError: {str(e)}") + + async def cleanup(self): + """Clean up resources""" + await self.exit_stack.aclose() + + +async def main(): + if len(sys.argv) < 2: + print("Usage: python client.py ") + sys.exit(1) + + client = MCPClient() + try: + await client.connect_to_server(sys.argv[1]) + await client.chat_loop() + finally: + await client.cleanup() + + +if __name__ == "__main__": + import sys + + asyncio.run(main()) diff --git a/mcp_server/test_server.py b/mcp_server/test_server.py new file mode 100644 index 0000000..71535e1 --- /dev/null +++ b/mcp_server/test_server.py @@ -0,0 +1,45 @@ +import logging +from typing import Any + +import httpx +import mcp.types as types +from mcp.server.fastmcp import FastMCP +from mcp.server.fastmcp.prompts import base +from pydantic import AnyUrl + +logger = logging.getLogger("Example MCP Server") +logger.info("Starting Example MCP Server") + +mcp = FastMCP("Example MCP Server") + + +@mcp.resource("resource://example-txt") +def get_example_txt() -> Any: + logger.debug("get_example_txt called") + with open("./mcp_server/example.txt", "r") as f: + return f.read() + + +@mcp.resource("resource://example-txt/{string}") +def get_example_txt_with_str(string: str) -> Any: + logger.debug("get_example_txt called") + with open("./mcp_server/example.txt", "r") as f: + return f"inserted str: {string}, example txt content: {f.read()}" + + +@mcp.tool() +def surround(character: str, main_body: str) -> str: + logger.debug("surround called") + return f"{character}{main_body}{character}" + + +@mcp.prompt(name="summarize_example_text") +def summarize_example_text(text: str) -> list[base.Message]: + return [ + base.UserMessage("Please summarize this text"), + base.UserMessage(text), + ] + + +if __name__ == "__main__": + mcp.run() diff --git a/mcp_server/utils.py b/mcp_server/utils.py new file mode 100644 index 0000000..64bcfba --- /dev/null +++ b/mcp_server/utils.py @@ -0,0 +1,73 @@ +import logging +import os +from typing import Any, Dict, Optional + +import requests +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry + +mcp_server_port = os.getenv("MCP_SERVER_PORT", "8001") + +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") +logger = logging.getLogger("all.mcp.utils") + +REQUEST_TIMEOUT = int(os.getenv("REQUEST_TIMEOUT", 120)) +RETRY_TOTAL = int(os.getenv("RETRY_TOTAL", 3)) +RETRY_BACKOFF_FACTOR = float(os.getenv("RETRY_BACKOFF_FACWTOR", 0.3)) + + +class ObservabilityClient: + observability_server_url: Optional[str] = None + jaeger_service_account_token: Optional[str] = None + headers: Optional[Dict] = None + session: Optional[Any] = None + + def __init__(self, observability_url: Optional[str] = None): + # FIXME: this is always None because we don't use this env var anymore + # refactor this logic. + self.observability_server_url = os.environ.get("JAEGER_URL", None) + if self.observability_server_url is None: + if observability_url is not None: + self.observability_server_url = observability_url + else: + self.observability_server_url = f"http://localhost:{mcp_server_port}" + + logger.debug(f"observability endpoint is: {self.observability_server_url}") + + # This is almost always NOP because we don't have such setting + self.jaeger_service_account_token = os.environ.get("GRAFANA_SERVICE_ACCOUNT_TOKEN", "NOP") + + logger.debug( + "url: {g}, token: {t}".format(g=self.observability_server_url, t=self.jaeger_service_account_token) + ) + + self.headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.jaeger_service_account_token}", + } + self.session = self.create_retrying_session() + + def create_retrying_session(self) -> requests.Session: + session = requests.Session() + + retries = Retry( + total=RETRY_TOTAL, + backoff_factor=RETRY_BACKOFF_FACTOR, + status_forcelist=[500, 502, 503, 504], + ) + adapter = HTTPAdapter(max_retries=retries) + session.mount("http://", adapter) + session.mount("https://", adapter) + return session + + def make_request(self, method: str, url: str, **kwargs) -> requests.Response: + try: + response = self.session.request(method, url, headers=self.headers, timeout=REQUEST_TIMEOUT, **kwargs) + response.raise_for_status() + return response + except requests.Timeout: + logger.error(f"Request timed out after {REQUEST_TIMEOUT} seconds") + raise + except requests.RequestException as e: + logger.error(f"Request failed: {e}") + raise diff --git a/provisioner/README.md b/provisioner/README.md new file mode 100644 index 0000000..9bd9339 --- /dev/null +++ b/provisioner/README.md @@ -0,0 +1,177 @@ +# CloudLab Provisioner + +A tool for automatically provisioning and managing clusters on CloudLab. This provisioner helps maintain a pool of available clusters and handles cluster lifecycle management including claiming, extending, and automatic cleanup. + +## Features + +- Automatic cluster provisioning and management +- User registration and cluster claiming +- Automatic cluster extension for active users +- Automatic cleanup of inactive clusters +- Email notifications for cluster events +- CLI interface for easy interaction + +## Prerequisites + +1. Go to https://www.cloudlab.us/ +2. Login with your cloudlab account +3. On the top right corner, click on your username, and then click on "Download Credentials" +4. This will take you to a page with a button to download the credentials. Click on it. +5. This will download a file called `cloudlab.pem`. + +The `cloudlab.pem` contains the encrypted private key to your cloudlab account and ssl certificate. You need to decrypt it before using it. + +### Install OpenSSL (if not already installed) + +For Ubuntu/Debian: +```bash +sudo apt install openssl +``` + +For macOS: +```bash +brew install openssl +``` + +### Decrypting the CloudLab Credentials + +```bash +openssl rsa -in cloudlab.pem -out cloudlab_decrypted.pem +``` + +When prompted for a password, enter your CloudLab account password (the same one you use to login to the CloudLab website). +This will create a new file `cloudlab_decrypted.pem` containing your decrypted private key. +The SSL certificate remains in the original `cloudlab.pem` file. + +### Environment Variables + +The provisioner needs its own set of ssh keys. Generate ssh keys for the provisioner using the following command: + +```bash +ssh-keygen -t ed25519 -f provisioner_ssh_key +``` + +Set the following required environment variables in `.env` file: + +```bash +PROVISIONER_SSH_PRIVATE_KEY_PATH="/path/to/provisioner_ssh_key" +PROVISIONER_SSH_PUBLIC_KEY_PATH="/path/to/provisioner_ssh_key.pub" + +CLOUDLAB_CERT_PATH="/path/to/cloudlab.pem" +CLOUDLAB_KEY_PATH="/path/to/cloudlab_decrypted.pem" +CLOUD_PROJECT_NAME="your-cloudlab-project-name" + +DEPLOY_KEY_PATH="/path/to/deploy-key +``` + +Optional email notification settings: + +```bash +SMTP_SERVER="smtp.gmail.com" +MTP_PORT="587" +SMTP_USERNAME="your.email@gmail.com" +SMTP_PASSWORD="your-app-password" +``` + +For Gmail, you'll need to create an app password. Follow this [guide](https://bestsoftware.medium.com/how-to-create-an-app-password-on-gmail-e00eff3af4e0) to create one. + +## CloudLab Provisioner + +### How It Works + +#### Cluster Management + +- Maintains 2 unclaimed clusters ready for use +- Unclaimed clusters are deleted after 16 hours of inactivity +- Each user can claim up to 2 clusters +- Maximum of 8 total clusters (claimed + unclaimed) + +#### Cluster Lifecycle + +1. **Claiming**: Users can claim available clusters. If no clusters clsuter ready to be claimed, the provisioner will create new ones. +2. **Extension**: Claimed clusters are automatically extended for the next 7 days every day. +3. **Cleanup**: Inactive clusters (>48 hours without SSH access) are automatically deleted + +#### Daemon Operation + +- Runs every set interval (default is 5 minutes) +- Manages cluster lifecycle +- Sends email notifications for important events +- Handles automatic extensions and cleanup + +### Running the Provisioner as a Daemon Service + +1. Run the setup script: +```bash +chmod +x setup_daemon.sh +sudo ./setup_daemon.sh +``` +2. To stop the daemon, run: +```bash +sudo systemctl stop provisioner.service +``` + +### Running the Provisioner as a Program + +To run the provisioner as a program, run the following command: + +```bash +cd provisioner +python3 daemon.py +``` + +### Configuring the Provisioner + +The variables in `config/settings.py` file are used to configure the provisioner. They can be edited to change the provisioner settings. + +### Using the CLI + +The provisioner provides a command-line interface for managing clusters: + +```bash +python3 cli.py --help +``` + +#### Available Commands + +| Command | Description | Example | +|---------|-------------|---------| +| `register` | Register a new user | `python3 cli.py register --email user@example.com --ssh-key ~/.ssh/id_rsa.pub` | +| `claim` | Claim an available cluster | `python3 cli.py claim --email user@example.com` | +| `list` | List clusters for a user | `python3 cli.py list --email user@example.com` | +| `relinquish` | Release a claimed cluster | `python3 cli.py relinquish --email user@example.com --experiment exp-name` | +| `status` | Check cluster status | `python3 cli.py status --experiment exp-name` | + +`claim` has two additional options: +- `--deploy-sregym`: Deploys SREGym on the claimed cluster +- `--eval-override`: Overrides the evaluation mode for the claimed cluster so that it won't be deleted because of inactivity + +## Testing +The `tests/provisioner/test_provisioner.py` file contains a test suite that tests the core functionalities of the provisioner. Set `SET_TEST_VALUES` to `True` in `config/settings.py` to run the tests with test values. The tests provision actual CloudLab clusters for testing, so CloudLab credentials are required. Running the first 8 tests takes approximately 35-40 minutes. The last one `test_sregym_deploy` takes approximately 10-12 minutes. + +### Running Tests + +To run all tests: +```bash +cd tests/provisioner +python3 pytest test_provisioner.py +``` + +To run a specific test: +```bash +python3 pytest test_provisioner.py::test_name +``` + +### Test Suite Overview + +The test suite includes the following tests: + +1. test_auto_provisioning - Tests automatic cluster provisioning when lower than MIN_AVAILABLE_CLUSTERS +2. test_user_claim_and_relinquish - Tests user cluster claim and release workflow +3. test_max_clusters_per_user - Ensures users can't exceed their cluster limit +4. test_unclaimed_cluster_timeout - Tests automatic cleanup of unused clusters +5. test_max_total_clusters_limit - Tests system-wide cluster limit enforcement +6. test_claimed_cluster_inactivity_timeout - Tests cleanup of inactive claimed clusters +7. test_eval_override_for_inactivity - Tests evaluation mode claimed cluster protection +8. test_claimed_cluster_extension - Tests automatic claimed cluster reservation extension +9. test_sregym_deploy - Tests SREGym deployment on a claimed cluster \ No newline at end of file diff --git a/provisioner/__init__.py b/provisioner/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/provisioner/cli.py b/provisioner/cli.py new file mode 100644 index 0000000..d3ee472 --- /dev/null +++ b/provisioner/cli.py @@ -0,0 +1,680 @@ +import datetime +import logging +import os +import re +import time +from pathlib import Path + +import click + +from provisioner.cloudlab_provisioner import CloudlabProvisioner +from provisioner.config.settings import DefaultSettings +from provisioner.state_manager import CLUSTER_STATUS, SREGYM_STATUS, StateManager +from provisioner.utils.ssh import SSHManager, SSHUtilError +from scripts.geni_lib.cluster_setup import setup_cloudlab_cluster_with_sregym + +logger = logging.getLogger(__name__) + +_state_manager_instance: StateManager = None +_cloudlab_provisioner_instance: CloudlabProvisioner = None +EMAIL_REGEX = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$" + + +def is_valid_email(email: str) -> bool: + return re.match(EMAIL_REGEX, email) is not None + + +def get_state_manager() -> StateManager: + global _state_manager_instance + if _state_manager_instance is None: + _state_manager_instance = StateManager(db_path=DefaultSettings.DATABASE_PATH) + return _state_manager_instance + + +def get_cloudlab_provisioner() -> CloudlabProvisioner: + global _cloudlab_provisioner_instance + if _cloudlab_provisioner_instance is None: + _cloudlab_provisioner_instance = CloudlabProvisioner() + return _cloudlab_provisioner_instance + + +def _ensure_ssh_prerequisites(): + """Checks if necessary SSH configuration for the provisioner is present.""" + if not DefaultSettings.PROVISIONER_DEFAULT_SSH_USERNAME: + click.echo( + click.style("ERROR: PROVISIONER_DEFAULT_SSH_USERNAME is not correctly set in settings.py.", fg="red") + ) + return False + key_path = Path(os.path.expanduser(DefaultSettings.PROVISIONER_SSH_PRIVATE_KEY_PATH)) + if not key_path.exists(): + click.echo( + click.style( + f"ERROR: Provisioner's SSH private key not found at '{key_path}'. This is required for node operations.", + fg="red", + ) + ) + return False + return True + + +def _get_ssh_manager(hostname: str) -> SSHManager: + """Creates an SSHManager instance after ensuring prerequisites.""" + if not _ensure_ssh_prerequisites(): + raise click.Abort() # Abort the current command + return SSHManager( + hostname=hostname, + username=DefaultSettings.PROVISIONER_DEFAULT_SSH_USERNAME, + private_key_path=DefaultSettings.PROVISIONER_SSH_PRIVATE_KEY_PATH, + ) + + +def _format_ssh_command(login_info_entry: list) -> str: + """Formats an SSH command string from a login_info entry.""" + ssh_user = DefaultSettings.PROVISIONER_DEFAULT_SSH_USERNAME + hostname = login_info_entry[2] + port = login_info_entry[3] + return f"ssh {ssh_user}@{hostname} -p {port}" + + +def _add_user_ssh_key_to_node(ssh_mgr: SSHManager, user_public_key: str, user_id_for_log: str) -> bool: + """ + Safely adds a user's SSH public key to the authorized_keys file on a remote node. + Returns True on success, False on failure. + """ + hostname_for_log = ssh_mgr.hostname + try: + # Escape the public key for shell safety + escaped_key = user_public_key.replace('"', '\\"') + + # Single command to create .ssh directory, add key, and set permissions + cmd = ( + "mkdir -p ~/.ssh && chmod 700 ~/.ssh && " + f'echo "{escaped_key}" >> ~/.ssh/authorized_keys && ' + "chmod 600 ~/.ssh/authorized_keys" + ) + _, stderr, exit_code = ssh_mgr.execute_ssh_command(cmd) + + if exit_code != 0: + click.echo(click.style(f"ERROR: Failed to setup SSH key on {hostname_for_log}: {stderr}", fg="red")) + logger.error(f"SSH key setup failed on {hostname_for_log} for user {user_id_for_log}: {stderr}") + return False + + logger.info(f"User SSH key for {user_id_for_log} added to {hostname_for_log}") + return True + + except SSHUtilError as e: + click.echo(click.style(f"ERROR: SSH operation failed on {hostname_for_log} while adding key: {e}", fg="red")) + logger.error(f"SSHUtilError on {hostname_for_log} for user {user_id_for_log}: {e}") + return False + except Exception as e: + click.echo( + click.style(f"ERROR: Unexpected error during SSH key injection on {hostname_for_log}: {e}", fg="red") + ) + logger.error(f"Unexpected error injecting key on {hostname_for_log} for {user_id_for_log}: {e}", exc_info=True) + return False + + +def _remove_user_ssh_key_from_node(ssh_mgr: SSHManager, user_public_key: str, user_id_for_log: str) -> bool: + hostname_for_log = ssh_mgr.hostname + operation_id = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f") + logger.debug( + f"Minimally attempting to remove SSH key for user {user_id_for_log} from {hostname_for_log} (OpID: {operation_id})." + ) + + user_public_key_cleaned = user_public_key.strip() + if not user_public_key_cleaned: + logger.error(f"Empty public key provided for {user_id_for_log}. Cannot remove.") + return False + + try: + # Escape the public key for shell safety + escaped_key = user_public_key_cleaned.replace('"', '\\"') + + # Single command to remove the key and update permissions + cmd = ( + f"if [ -f ~/.ssh/authorized_keys ]; then " + f'grep -v -F -x "{escaped_key}" ~/.ssh/authorized_keys > ~/.ssh/authorized_keys.tmp && ' + f"mv ~/.ssh/authorized_keys.tmp ~/.ssh/authorized_keys && " + f"chmod 600 ~/.ssh/authorized_keys; " + f"else " + f'echo "Authorized_keys file not found, key considered absent."; ' + f"fi" + ) + + stdout, stderr, exit_code = ssh_mgr.execute_ssh_command(cmd) + + if exit_code == 0: + if "Authorized_keys file not found" in stdout: + logger.info( + f"Authorized_keys file not found on {hostname_for_log} for user {user_id_for_log}. Key considered absent." + ) + else: + logger.info( + f"Successfully processed authorized_keys for key removal for {user_id_for_log} on {hostname_for_log}." + ) + return True # Success or file not found (key absent) + else: + logger.error( + f"Failed to execute key removal command for {user_id_for_log} on {hostname_for_log}. " + f"Exit code: {exit_code}, Stdout: '{stdout}', Stderr: '{stderr}'." + ) + return False + + except SSHUtilError as e: + logger.error(f"SSHUtilError during minimal key removal for {user_id_for_log} on {hostname_for_log}: {e}") + return False + except Exception as e: + logger.error( + f"Unexpected error during minimal key removal for {user_id_for_log} on {hostname_for_log}: {e}", + exc_info=True, + ) + return False + + +def _setup_sregym(cluster_info: dict) -> bool: + """ + Setup SREGym on a newly provisioned cluster. + Returns True on success, False on failure. + """ + try: + slice_name = cluster_info["slice_name"] + login_info = cluster_info["login_info"] + + click.echo(click.style(f"Setting up SREGym on cluster {slice_name}...", fg="yellow")) + hosts = [info[2] for info in login_info] + + cfg = { + "cloudlab": { + "ssh_user": DefaultSettings.PROVISIONER_DEFAULT_SSH_USERNAME, + "ssh_key": DefaultSettings.PROVISIONER_SSH_PRIVATE_KEY_PATH, + "nodes": hosts, + }, + "pod_network_cidr": DefaultSettings.DEFAULT_POD_NETWORK_CIDR, + "deploy_sregym": True, + "deploy_key": DefaultSettings.DEPLOY_KEY_PATH, + } + + setup_cloudlab_cluster_with_sregym(cfg) + + click.echo(click.style(f"SREGym setup completed successfully for {slice_name}.", fg="green")) + return True + except Exception as e: + click.echo(click.style(f"Error setting up SREGym: {e}", fg="red")) + logger.error(f"Error setting up SREGym for {slice_name}: {e}", exc_info=True) + return False + + +# --- Click Command Group --- +@click.group() +@click.option("--verbose", "-v", is_flag=True, help="Enable verbose output for some operations.") +@click.pass_context +def cli(ctx, verbose): + """Cloudlab Cluster Provisioner CLI.""" + ctx.ensure_object(dict) + ctx.obj["VERBOSE"] = verbose + if verbose: + click.echo("Verbose mode enabled for CLI.") + + +# --- User Commands --- +@cli.command() +@click.option("--email", required=True, help="Your unique email address for registration.") +@click.option("--ssh-key", required=True, help="Your SSH public key.") +def register(email, ssh_key): + """Registers a new user with their email and SSH public key.""" + sm = get_state_manager() + if not is_valid_email(email): + click.echo(click.style("ERROR: Invalid email address format.", fg="red")) + return + + try: + # Basic validation of key format + if not ( + ssh_key.startswith("ssh-rsa ") + or ssh_key.startswith("ssh-ed25519 ") + or ssh_key.startswith("ecdsa-sha2-nistp") + ): # Note the space + click.echo( + click.style( + "ERROR: Invalid or incomplete SSH public key format. Ensure it includes the key type (e.g., 'ssh-rsa AAA...').", + fg="red", + ) + ) + return + except Exception as e: + click.echo(click.style(f"ERROR: Could not read SSH key file: {e}", fg="red")) + return + + if sm.add_user(email, ssh_key): + click.echo(click.style(f"User with email '{email}' registered successfully.", fg="green")) + else: + click.echo(click.style(f"User with email '{email}' might already be registered", fg="yellow")) + + +@cli.command() +@click.option("--email", required=True, help="Your registered email address.") +@click.option("--eval-override", is_flag=True, help="Request evaluation override for longer inactivity timeout.") +@click.option("--deploy-sregym", is_flag=True, help="Deploy SREGym on the cluster.") +@click.pass_context +def claim(ctx, email, eval_override, deploy_sregym): + """Claims an available cluster or requests a new one.""" + sm = get_state_manager() + cp = get_cloudlab_provisioner() + + if not is_valid_email(email): + click.echo(click.style("ERROR: Invalid email address format.", fg="red")) + return + + user = sm.get_user(email) + if not user: + click.echo(click.style(f"ERROR: User with email '{email}' not registered. Please register first.", fg="red")) + return + + user_claimed_count = sm.count_user_claimed_clusters(email) + if user_claimed_count >= DefaultSettings.MAX_CLUSTERS_PER_USER: + click.echo( + click.style( + f"ERROR: User '{email}' has already claimed the maximum of {DefaultSettings.MAX_CLUSTERS_PER_USER} clusters.", + fg="red", + ) + ) + return + + # 1. Try to get an existing unclaimed_ready cluster + unclaimed_clusters = sm.get_clusters_by_status(CLUSTER_STATUS.STATUS_UNCLAIMED_READY) + if unclaimed_clusters: + cluster_to_claim = unclaimed_clusters[0] # Simple: take the first one + slice_name = cluster_to_claim["slice_name"] + hostname = cluster_to_claim["control_node_hostname"] + + click.echo(f"Found available cluster: {slice_name}. Attempting to claim for '{email}'...") + sm.update_cluster_record(slice_name, status=CLUSTER_STATUS.STATUS_USER_PROVISIONING, claimed_by_user_id=email) + + if not hostname: + click.echo( + click.style( + f"ERROR: Cluster {slice_name} has no control_node_hostname. Cannot proceed with claim.", fg="red" + ) + ) + logger.error(f"Claim aborted for {slice_name}: missing control_node_hostname in DB.") + return + + try: + while not cp.are_nodes_ready(slice_name, cluster_to_claim["aggregate_name"]): + click.echo(click.style(f"Waiting for nodes to be ready on {slice_name}...", fg="yellow")) + time.sleep(10) + except Exception as e: + click.echo(click.style(f"ERROR: Failed to wait for nodes to be ready on {slice_name}: {e}", fg="red")) + logger.error(f"Failed to wait for nodes to be ready on {slice_name}: {e}") + sm.update_cluster_record(slice_name, status=CLUSTER_STATUS.STATUS_ERROR, last_error_message=str(e)) + return + + try: + # add ssh public key to all nodes in the cluster + for node_info in cluster_to_claim["login_info"]: + node_hostname = node_info[2] + ssh_mgr = _get_ssh_manager(node_hostname) + logger.info(f"Adding user SSH key to node {node_hostname} for user {email}") + if not _add_user_ssh_key_to_node(ssh_mgr, user["ssh_public_key"], email): + return + user_ssh_key_installed_flag = True + + except (SSHUtilError, click.Abort) as e_ssh: # Catch Abort from _get_ssh_manager + click.echo(click.style(f"ERROR: SSH operation failed for new cluster {slice_name}: {e_ssh}", fg="red")) + sm.update_cluster_record( + slice_name, + status=CLUSTER_STATUS.STATUS_ERROR, + last_error_message=f"SSH key injection failed: {e_ssh}", + ) + if cluster_to_claim.get("aggregate_name"): # Attempt cleanup + # Mark the experiment for termination + logger.info(f"Marking experiment {slice_name} for termination") + sm.update_cluster_record(slice_name, status=CLUSTER_STATUS.STATUS_TERMINATING) + return + + # Extend Cloudlab duration + now = datetime.datetime.now() + new_duration_hours = DefaultSettings.CLAIMED_CLUSTER_DEFAULT_DURATION_HOURS + new_cloudlab_expires_at = cluster_to_claim["cloudlab_expires_at"] + try: + if cp.renew_experiment(slice_name, new_duration_hours, cluster_to_claim["aggregate_name"]): + new_cloudlab_expires_at = now + datetime.timedelta(hours=new_duration_hours) + logger.info(f"Extended Cloudlab experiment {slice_name} to {new_cloudlab_expires_at}") + else: + click.echo( + click.style( + f"WARNING: Failed to extend Cloudlab duration for {slice_name}. It may expire sooner. Current expiry: {new_cloudlab_expires_at}", + fg="yellow", + ) + ) + except Exception as e: + click.echo( + click.style( + f"WARNING: Error extending Cloudlab duration for {slice_name}: {e}. Current expiry: {new_cloudlab_expires_at}", + fg="yellow", + ) + ) + + if deploy_sregym: + click.echo("Setting up SREGym for your cluster. This may take several minutes...") + experiment_info = { + "slice_name": slice_name, + "login_info": cluster_to_claim["login_info"], + } + setup_success = _setup_sregym(experiment_info) + if setup_success: + sm.update_cluster_record( + slice_name, + sregym_setup_status=SREGYM_STATUS.SREGYM_SUCCESS, + ) + click.echo(click.style("SREGym successfully set up on your cluster!", fg="green")) + else: + sm.update_cluster_record( + slice_name, + sregym_setup_status=SREGYM_STATUS.SREGYM_FAILED, + last_error_message="SREGym setup failed", + ) + click.echo( + click.style( + "SREGym setup failed. You may still use the cluster for basic operations.", fg="yellow" + ) + ) + + # Update DB + sm.update_cluster_record( + slice_name, + status=CLUSTER_STATUS.STATUS_CLAIMED, + claimed_by_user_id=email, + user_ssh_key_installed=user_ssh_key_installed_flag, + cloudlab_expires_at=new_cloudlab_expires_at, + evaluation_override=eval_override, + claimed_at=now, + ) + click.echo(click.style(f"Cluster '{slice_name}' successfully claimed by '{email}'.", fg="green")) + click.echo("SSH Access (Control Node):") + if cluster_to_claim.get("login_info"): + for node_info in cluster_to_claim["login_info"]: + click.echo(f" {_format_ssh_command(node_info)}") + elif hostname: # Fallback if login_info is missing/malformed but hostname exists + click.echo(f" ssh {DefaultSettings.PROVISIONER_DEFAULT_SSH_USERNAME}@{hostname}") + else: + click.echo(click.style(" Could not determine SSH access details.", fg="yellow")) + + else: # No UNCLAIMED_READY clusters, try to provision a new one for the user + click.echo("No readily available clusters. Attempting to provision a new one for you...") + + current_total_managed = sm.count_total_managed_clusters() + if current_total_managed >= DefaultSettings.MAX_TOTAL_CLUSTERS: + click.echo( + click.style( + f"ERROR: Maximum total clusters ({DefaultSettings.MAX_TOTAL_CLUSTERS}) reached. Cannot provision for user '{email}' at this time.", + fg="red", + ) + ) + return + + slice_name = cp.generate_slice_name() + click.echo(f"Requesting new cluster: {slice_name} (this may take several minutes)...") + + # Create DB record first, marking it as user-provisioning and pre-assigning to user + sm.create_cluster_record( + slice_name=slice_name, + aggregate_name="", + os_type=DefaultSettings.DEFAULT_OS_TYPE, + node_count=DefaultSettings.DEFAULT_NODE_COUNT, + status=CLUSTER_STATUS.STATUS_USER_PROVISIONING, + claimed_by_user_id=email, # Pre-claim + evaluation_override=eval_override, + ) + + experiment_info = None + try: + user_provision_duration = DefaultSettings.CLAIMED_CLUSTER_DEFAULT_DURATION_HOURS + experiment_info = cp.create_experiment( + slice_name=slice_name, + hardware_type=DefaultSettings.DEFAULT_HARDWARE_TYPE, + os_type=DefaultSettings.DEFAULT_OS_TYPE, + node_count=DefaultSettings.DEFAULT_NODE_COUNT, + duration=user_provision_duration, + ) + + if not (experiment_info and experiment_info.get("login_info")): + raise Exception("Cloudlab experiment creation failed or returned no login_info.") + + control_node_info = next((n for n in experiment_info["login_info"] if n[0] == "control"), None) + if not control_node_info: + raise ValueError("Control node info not found in login_info after user provisioning.") + hostname = control_node_info[2] + now = datetime.datetime.now() + expires_at = now + datetime.timedelta(hours=experiment_info["duration"]) + + try: + while not cp.are_nodes_ready(slice_name, experiment_info["aggregate_name"]): + click.echo(click.style(f"Waiting for nodes to be ready on {slice_name}...", fg="yellow")) + time.sleep(10) + logger.info(f"Nodes are ready for {slice_name}.") + except Exception as e: + click.echo(click.style(f"ERROR: Failed to wait for nodes to be ready on {slice_name}: {e}", fg="red")) + logger.error(f"Failed to wait for nodes to be ready on {slice_name}: {e}") + sm.update_cluster_record(slice_name, status=CLUSTER_STATUS.STATUS_ERROR, last_error_message=str(e)) + return + + try: + # add ssh public key to all nodes in the cluster + for node_info in experiment_info["login_info"]: + node_hostname = node_info[2] + ssh_mgr = _get_ssh_manager(node_hostname) + logger.info(f"Adding user SSH key to node {node_hostname} for user {email}") + if not _add_user_ssh_key_to_node(ssh_mgr, user["ssh_public_key"], email): + return + user_ssh_key_installed_flag = True + + except (SSHUtilError, click.Abort) as e_ssh: # Catch Abort from _get_ssh_manager + click.echo(click.style(f"ERROR: SSH operation failed for new cluster {slice_name}: {e_ssh}", fg="red")) + sm.update_cluster_record( + slice_name, + status=CLUSTER_STATUS.STATUS_ERROR, + last_error_message=f"SSH key injection failed: {e_ssh}", + ) + if experiment_info.get("aggregate_name"): # Attempt cleanup + # Mark the experiment for termination + logger.info(f"Marking experiment {slice_name} for termination") + sm.update_cluster_record(slice_name, status=CLUSTER_STATUS.STATUS_TERMINATING) + return + + if deploy_sregym: + click.echo("Setting up SREGym for your cluster. This may take several minutes...") + setup_success = _setup_sregym(experiment_info) + if setup_success: + sm.update_cluster_record( + slice_name, + sregym_setup_status=SREGYM_STATUS.SREGYM_SUCCESS, + ) + click.echo(click.style("SREGym successfully set up on your cluster!", fg="green")) + else: + sm.update_cluster_record( + slice_name, + sregym_setup_status=SREGYM_STATUS.SREGYM_FAILED, + last_error_message="SREGym setup failed", + ) + click.echo( + click.style( + "SREGym setup failed. You may still use the cluster for basic operations.", fg="yellow" + ) + ) + + sm.update_cluster_record( + slice_name, + status=CLUSTER_STATUS.STATUS_CLAIMED, + aggregate_name=experiment_info["aggregate_name"], + hardware_type=experiment_info["hardware_type"], + control_node_hostname=hostname, + login_info=experiment_info["login_info"], + user_ssh_key_installed=user_ssh_key_installed_flag, + cloudlab_expires_at=expires_at, + claimed_at=now, + ) + click.echo( + click.style( + f"New cluster '{slice_name}' successfully provisioned and claimed by '{email}'.", fg="green" + ) + ) + click.echo("SSH Access:") + if experiment_info.get("login_info"): + for node_info in experiment_info["login_info"]: + click.echo(f" {_format_ssh_command(node_info)}") + elif hostname: + click.echo(f" ssh {DefaultSettings.PROVISIONER_DEFAULT_SSH_USERNAME}@{hostname}") + + except Exception as e: + click.echo( + click.style( + f"ERROR: An unexpected error occurred during user-triggered provisioning for {slice_name}: {e}", + fg="red", + ) + ) + logger.error(f"User provision error for {slice_name}: {e}", exc_info=True) + # Ensure status is ERROR if it was created in DB + if sm.get_cluster_by_slice_name(slice_name): + sm.update_cluster_record(slice_name, status=CLUSTER_STATUS.STATUS_ERROR, last_error_message=str(e)) + # Attempt to delete from Cloudlab if experiment_info was partially obtained + if experiment_info and experiment_info.get("aggregate_name"): + logger.info( + f"Attempting to cleanup partially provisioned Cloudlab experiment {slice_name} (user-triggered)" + ) + # Mark the experiment for termination + sm.update_cluster_record(slice_name, status=CLUSTER_STATUS.STATUS_TERMINATING) + + +@cli.command(name="list") +@click.option("--email", help="List clusters claimed by this email. If not provided, lists unclaimed ready clusters.") +@click.pass_context +def list_clusters(ctx, email): + """Lists clusters. Shows unclaimed ready, or user's claimed clusters.""" + sm = get_state_manager() + verbose = ctx.obj.get("VERBOSE", False) + + if email: + if not is_valid_email(email): + click.echo(click.style("ERROR: Invalid email address format.", fg="red")) + return + user = sm.get_user(email) + if not user: + click.echo(click.style(f"ERROR: User with email '{email}' not registered.", fg="red")) + return + clusters = sm.get_claimed_clusters_by_user(email) + if not clusters: + click.echo(f"User '{email}' has no claimed clusters.") + return + click.echo(f"Clusters claimed by '{email}':") + else: + clusters = sm.get_clusters_by_status(CLUSTER_STATUS.STATUS_UNCLAIMED_READY) + if not clusters: + click.echo("No unclaimed ready clusters available.") + return + click.echo("Unclaimed Ready Clusters:") + + for cluster in clusters: + click.echo(f" Slice: {cluster['slice_name']} (Status: {cluster['status']})") + if verbose or email: # Show more details if verbose or listing user's clusters + if cluster.get("control_node_hostname"): + click.echo(f" Control Node: {cluster['control_node_hostname']}") + if cluster.get("cloudlab_expires_at"): + expires_at_str = ( + cluster["cloudlab_expires_at"].strftime("%Y-%m-%d %H:%M:%S %Z") + if isinstance(cluster["cloudlab_expires_at"], datetime.datetime) + and cluster["cloudlab_expires_at"].tzinfo + else str(cluster["cloudlab_expires_at"]) + ) + click.echo(f" Cloudlab Expires: {expires_at_str}") + if cluster.get("login_info") and isinstance(cluster.get("login_info"), list): + for node_info in cluster["login_info"]: + if node_info[0] == "control": + click.echo(f" SSH: {_format_ssh_command(node_info)}") + if verbose: # Even more details for verbose mode + click.echo(f" Aggregate: {cluster.get('aggregate_name')}") + click.echo(f" Hardware: {cluster.get('hardware_type')}") + click.echo(f" Claimed by: {cluster.get('claimed_by_user_id', 'N/A')}") + click.echo(f" SREGym: {cluster.get('sregym_setup_status', 'N/A')}") + + +@cli.command() +@click.option("--email", required=True, help="Your registered email address.") +@click.option("--experiment", required=True, help="The name of the experiment to relinquish.") +def relinquish(email, experiment): + """Relinquishes a claimed cluster, marking it for termination.""" + try: + sm = get_state_manager() + if not is_valid_email(email): + click.echo(click.style("ERROR: Invalid email address format.", fg="red")) + return + + user = sm.get_user(email) + if not user: + click.echo(click.style(f"ERROR: User with email '{email}' not registered.", fg="red")) + return + + cluster = sm.get_cluster_by_slice_name(experiment) + if not cluster: + click.echo(click.style(f"ERROR: Cluster '{experiment}' not found.", fg="red")) + return + + if cluster["claimed_by_user_id"] != email or cluster["status"] != CLUSTER_STATUS.STATUS_CLAIMED: + click.echo( + click.style(f"ERROR: Cluster '{experiment}' is not currently claimed by user '{email}'.", fg="red") + ) + return + + sm.update_cluster_record( + experiment, + status=CLUSTER_STATUS.STATUS_TERMINATING, + claimed_by_user_id=None, # Disassociate user + user_ssh_key_installed=False, + ) + click.echo( + click.style(f"Cluster '{experiment}' relinquished by '{email}' and marked for termination.", fg="green") + ) + logger.info(f"User {email} relinquished cluster {experiment}. Marked for termination.") + except Exception as e: + click.echo(click.style(f"ERROR: Failed to update cluster '{experiment}' status to terminating: {e}", fg="red")) + logger.error(f"Failed to update cluster '{experiment}' status to terminating: {e}") + + +@cli.command() +@click.option("--experiment", required=True, help="The name of the experiment to get status for.") +def status(experiment): + """Shows detailed status of a specific cluster.""" + sm = get_state_manager() + cluster = sm.get_cluster_by_slice_name(experiment) + if not cluster: + click.echo(click.style(f"ERROR: Cluster '{experiment}' not found.", fg="red")) + return + + click.echo(f"Status for Experiment: {click.style(cluster['slice_name'], bold=True)}") + for key, value in sorted(cluster.items()): # Sort for consistent output + if key == "id": + continue # Skip internal DB id + + display_key = key.replace("_", " ").title() + display_value = value + + if isinstance(value, datetime.datetime): + display_value = value.strftime("%Y-%m-%d %H:%M:%S %Z") if value.tzinfo else value.isoformat() + elif key == "login_info" and isinstance(value, list): + click.echo(f" {display_key}:") + for node_entry in value: + # node_entry is [client_id, user_on_node, hostname, port] + if node_entry[0] == "control": + click.echo(f" - Control Node SSH: {_format_ssh_command(node_entry)}") + else: + click.echo(f" - {node_entry[0]}: {node_entry[2]}:{node_entry[3]}") # client_id: hostname:port + continue # Skip default print for login_info + elif value is None: + display_value = click.style("N/A", dim=True) + + click.echo(f" {display_key + ':':<30} {display_value}") + + +# --- Main Execution --- +if __name__ == "__main__": + cli(obj={}) diff --git a/provisioner/cloudlab_provisioner.py b/provisioner/cloudlab_provisioner.py new file mode 100644 index 0000000..aeaacb6 --- /dev/null +++ b/provisioner/cloudlab_provisioner.py @@ -0,0 +1,333 @@ +import datetime +import json +import random +import warnings + +import geni.portal as portal +import geni.util + +from provisioner.config.settings import ( + AGGREGATES_MAP, + CLOUD_LAB_CONTEXT_JSON, + DELETE_EXPERIMENT_ERRORS, + PRIORITY_HARDWARE_TYPES, + DefaultSettings, +) +from provisioner.utils.logger import logger +from provisioner.utils.parser import collect_and_parse_hardware_info, parse_sliver_info + +warnings.filterwarnings("ignore", category=UserWarning) + + +class CloudlabProvisioner: + def __init__(self): + + # create context.json + with open("context.json", "w") as f: + json.dump(CLOUD_LAB_CONTEXT_JSON, f, indent=4) + + context_path = "context.json" + self.context = geni.util.loadContext(path=context_path) + self.project = self.context.cf.project + self.framework = self.context.cf.name + self.cert_path = self.context.cf.cert + self.key_path = self.context.cf.key + self.user_name = self.context.uname + self.user_urn = list(self.context._users)[0].urn + self.user_pubkeypath = list(self.context._users)[0]._keys[0] + + def get_aggregate(self, aggregate_name: str): + return AGGREGATES_MAP[aggregate_name.lower()] + + def get_aggregate_version(self, aggregate_name: str): + aggregate = self.get_aggregate(aggregate_name) + return aggregate.getversion(context=self.context) + + def get_all_hardware_info(self, hardware_type: str): + all_hardware_list = collect_and_parse_hardware_info() + hardware_list = [] + for hardware in all_hardware_list: + if hardware["hardware_name"] == hardware_type: + hardware_list.append(hardware) + return hardware_list + + def print_all_hardware_info(self): + hardware_list = collect_and_parse_hardware_info() + print(f"{'Hardware Name':<20} | {'Cluster Name':<30} | {'Total':<7} | {'Free':<7}") + print("-" * 100) + for hardware in hardware_list: + print( + f"{hardware['hardware_name']:<20} | {hardware['cluster_name']:<30} | {hardware['total']:<7} | {hardware['free']:<7}" + ) + + def get_hardware_available_aggregate_name(self, hardware_type: str, node_count: int): + hardware_list = self.get_all_hardware_info(hardware_type) + aggregate_name = None + + for hardware in hardware_list: + if hardware["hardware_name"] == hardware_type and hardware["free"] >= node_count: + aggregate_name = hardware["cluster_name"].lower() + break + + if not aggregate_name: + logger.error("Error: Requested hardware is not available") + return None + + return aggregate_name + + def generate_slice_name(self): + return f"test-{random.randint(100000, 999999)}" + + def create_slice(self, slice_name: str, duration: float, description: str = "Cloudlab Experiment"): + try: + expiration = datetime.datetime.now() + datetime.timedelta(hours=duration) + res = self.context.cf.createSlice(self.context, slice_name, exp=expiration, desc=description) + return res + except Exception as e: + logger.error(f"Error: {e}") + return None + + def create_sliver(self, slice_name: str, rspec_file: str, aggregate_name: str): + try: + aggregate = self.get_aggregate(aggregate_name) + igm = aggregate.createsliver(self.context, slice_name, rspec_file) + geni.util.printlogininfo(manifest=igm) + + login_info = geni.util._corelogininfo(igm) + return login_info + except Exception as e: + logger.error(f"Error: {e}") + return None + + def create_rspec( + self, + hardware_type: str = DefaultSettings.DEFAULT_HARDWARE_TYPE, + os_type: str = DefaultSettings.DEFAULT_OS_TYPE, + node_count: int = DefaultSettings.DEFAULT_NODE_COUNT, + ): + os_url = f"urn:publicid:IDN+emulab.net+image+emulab-ops//{os_type}" + + # geni/portal.py keeps state of previous rspec request so we need to reset it otherwise it will throw MultipleRSpecError + portal.context._request = None + rspec = portal.context.makeRequestRSpec() + + nodes = [] + nodes.append(rspec.RawPC("control")) + for i in range(1, node_count): + nodes.append(rspec.RawPC(f"compute{i}")) + + for node in nodes: + node.hardware_type = hardware_type + node.disk_image = os_url + + link = rspec.Link(members=nodes) + + return rspec + + def create_experiment( + self, + slice_name: str = None, + duration: float = DefaultSettings.UNCLAIMED_CLUSTER_TIMEOUT_HOURS, + description: str = DefaultSettings.DEFAULT_DESCRIPTION, + hardware_type: str = DefaultSettings.DEFAULT_HARDWARE_TYPE, + os_type: str = DefaultSettings.DEFAULT_OS_TYPE, + node_count: int = DefaultSettings.DEFAULT_NODE_COUNT, + save_info: bool = True, + ): + logger.info( + f"Creating experiment with duration: {duration}, description: {description}, hardware_type: {hardware_type}, os_type: {os_type}, node_count: {node_count}" + ) + if not slice_name: + slice_name = self.generate_slice_name() + + for i in range(10): + logger.info(f"Creating slice {slice_name}: attempt {i+1}") + slice_info = self.create_slice(slice_name, duration, description) + if slice_info: + logger.info(f"Slice {slice_name} created successfully") + break + + if not slice_info: + logger.error("Error: Failed to create slice") + return None + + logger.info(f"Slice Info: {slice_info}") + + # Move the hardware type to the first position in the priority list if it is not already there + if hardware_type not in PRIORITY_HARDWARE_TYPES: + PRIORITY_HARDWARE_TYPES.insert(0, hardware_type) + else: + PRIORITY_HARDWARE_TYPES.remove(hardware_type) + PRIORITY_HARDWARE_TYPES.insert(0, hardware_type) + + for i, hardware_type in enumerate(PRIORITY_HARDWARE_TYPES): + logger.info(f"Getting hardware available aggregate name for {hardware_type}") + aggregate_name = self.get_hardware_available_aggregate_name(hardware_type, node_count) + + if not aggregate_name: + logger.error(f"Error: No hardware available for {hardware_type}") + continue + + logger.info(f"Found hardware available aggregate name: {aggregate_name}") + + logger.info(f"Creating rspec file for {slice_name} in {aggregate_name}") + rspec_file = self.create_rspec(hardware_type, os_type, node_count) + logger.info(f"Created rspec file for {slice_name} in {aggregate_name}") + + logger.info(f"Creating sliver for {slice_name} in {aggregate_name}") + login_info = self.create_sliver(slice_name, rspec_file, aggregate_name) + logger.info(f"Created sliver for {slice_name} in {aggregate_name}") + + if login_info: + logger.info(f"Created sliver for {hardware_type} in {aggregate_name}") + break + + if not login_info: + logger.error("Error: Requested hardware is not available") + return None + + experiment_info = { + "slice_name": slice_name, + "aggregate_name": aggregate_name, + "duration": duration, + "description": description, + "hardware_type": hardware_type, + "os_type": os_type, + "node_count": node_count, + "created_at": datetime.datetime.now().isoformat(), + "login_info": login_info, + } + + if save_info: + with open(f"{slice_name}.experiment.info.json", "w") as f: + json.dump(experiment_info, f, indent=4) + + logger.info( + f"Experiment Successfully created: {slice_name}, duration: {duration}, description: {description}, hardware_type: {hardware_type}, os_type: {os_type}, node_count: {node_count}" + ) + + return experiment_info + + def renew_experiment(self, slice_name: str, duration: float, aggregate_name: str): + try: + logger.info(f"Renewing experiment {slice_name} for {duration} hours") + + # Renew the slice (add 1 hour buffer to ensure slice outlives sliver) + slice_renewal_success = self.renew_slice(slice_name, duration + 1) + if not slice_renewal_success: + logger.error(f"Failed to renew slice {slice_name}") + return False + + logger.info(f"Successfully renewed slice {slice_name} for {duration} hours") + + # Renew the sliver + sliver_renewal_success = self.renew_sliver(slice_name, aggregate_name, duration) + if not sliver_renewal_success: + logger.error(f"Failed to renew sliver for slice {slice_name}") + return False + + logger.info(f"Successfully renewed experiment {slice_name} for {duration} hours") + return True + except Exception as e: + logger.error(f"Error renewing experiment: {e}") + return False + + def delete_experiment(self, slice_name: str, aggregate_name: str): + try: + logger.info(f"Deleting experiment {slice_name} in {aggregate_name}") + aggregate = self.get_aggregate(aggregate_name) + aggregate.deletesliver(self.context, slice_name) + logger.info(f"Successfully deleted experiment {slice_name} in {aggregate_name}") + return True + except Exception as e: + logger.error(f"Error: {e}") + if ( + DELETE_EXPERIMENT_ERRORS[1] in str(e) + or DELETE_EXPERIMENT_ERRORS[2] in str(e) + or DELETE_EXPERIMENT_ERRORS[3] in str(e) + ): + return True + return False + + def renew_slice(self, slice_name: str, duration: float): + try: + new_expiration = datetime.datetime.now() + datetime.timedelta(hours=duration) + self.context.cf.renewSlice(self.context, slice_name, new_expiration) + return True + except Exception as e: + if "Cannot shorten slice lifetime" in str(e): + logger.info(f"Slice '{slice_name}' already has sufficient lifetime") + return True + logger.error(f"Error: {e}") + return False + + def renew_sliver(self, slice_name: str, aggregate_name: str, duration: float): + try: + aggregate = self.get_aggregate(aggregate_name) + new_expiration = datetime.datetime.now() + datetime.timedelta(hours=duration) + aggregate.renewsliver(self.context, slice_name, new_expiration) + return True + except Exception as e: + logger.error(f"Error: {e}") + return False + + def get_sliver_status(self, slice_name: str, aggregate_name: str): + try: + aggregate = self.get_aggregate(aggregate_name) + sliver_info = aggregate.listresources(self.context, slice_name) + return sliver_info + except Exception as e: + logger.error(f"Error: {e}") + return None + + def get_sliver_spec(self, slice_name: str, aggregate_name: str): + try: + aggregate = self.get_aggregate(aggregate_name) + sliver_spec = aggregate.sliverstatus(self.context, slice_name) + return sliver_spec + except Exception as e: + logger.error(f"Error: {e}") + return None + + def print_experiment_spec(self, slice_name: str, aggregate_name: str): + sliver_spec = self.get_sliver_spec(slice_name, aggregate_name) + parsed_sliver_spec = parse_sliver_info(sliver_spec.text) + try: + print("\nExperiment Information:") + print(f"Description: {parsed_sliver_spec['description']}") + print(f"Expiration: {parsed_sliver_spec['expiration']}") + + print("\nNodes:") + for node in parsed_sliver_spec["nodes"]: + print(f"\nNode: {node['client_id']}") + print(f" Hostname: {node['hostname']}") + print(f" Public IP: {node['public_ip']}") + print(f" Internal IP: {node['internal_ip']}") + print(f" Hardware: {node['hardware']}") + print(f" OS Image: {node['os_image']}") + + print("\nLocation:") + print(f" Country: {parsed_sliver_spec['location']['country']}") + print(f" Latitude: {parsed_sliver_spec['location']['latitude']}") + print(f" Longitude: {parsed_sliver_spec['location']['longitude']}") + except Exception as e: + logger.error(f"Error: {e}") + return None + + def list_slices(self): + try: + slices = self.context.cf.listSlices(self.context) + return slices + except Exception as e: + logger.error(f"Error: {e}") + return None + + def are_nodes_ready(self, slice_name: str, aggregate_name: str) -> bool: + try: + aggregate = self.get_aggregate(aggregate_name) + sliver_status = aggregate.sliverstatus(self.context, slice_name) + resources = sliver_status.get("geni_resources", []) + return all(resource.get("pg_status") == "ready" for resource in resources) + except Exception as e: + logger.error(f"Error: {e}") + raise e diff --git a/provisioner/config/__init__.py b/provisioner/config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/provisioner/config/settings.py b/provisioner/config/settings.py new file mode 100644 index 0000000..a67cd31 --- /dev/null +++ b/provisioner/config/settings.py @@ -0,0 +1,93 @@ +import os + +from dotenv import load_dotenv +from geni.aggregate.cloudlab import Clemson, Utah, Wisconsin + +load_dotenv(override=True) + +SET_TEST_VALUES = False + + +# Default settings +class DefaultSettings: + #### Default Settings #### + DEFAULT_HARDWARE_TYPE = "c220g5" if not SET_TEST_VALUES else "m510" + DEFAULT_OS_TYPE = "UBUNTU22-64-STD" + DEFAULT_NODE_COUNT = 3 if not SET_TEST_VALUES else 1 + DEFAULT_DURATION_HOURS = 16 if not SET_TEST_VALUES else 0.05 + DEFAULT_DESCRIPTION = "Cloudlab Experiment" + + MIN_AVAILABLE_CLUSTERS = 2 if not SET_TEST_VALUES else 1 + MAX_TOTAL_CLUSTERS = 8 if not SET_TEST_VALUES else 2 + MAX_CLUSTERS_PER_USER = 2 if not SET_TEST_VALUES else 1 + UNCLAIMED_CLUSTER_TIMEOUT_HOURS = 16 if not SET_TEST_VALUES else 1 + CLAIMED_CLUSTER_DEFAULT_DURATION_HOURS = 100 if not SET_TEST_VALUES else 0.1 + CLAIMED_CLUSTER_INACTIVITY_TIMEOUT_HOURS = 48 if not SET_TEST_VALUES else 0.05 + CLAIMED_CLUSTER_EXTENSION_CHECK_HOURS = 24 if not SET_TEST_VALUES else 0.025 + + DATABASE_PATH = "database.sqlite3" + + DEFAULT_SSH_TIME_OUT_SECONDS = 30 # 30 + + LOG_PATH = "logs/" + + #### Provisioner Credentials #### + PROVISIONER_DEFAULT_SSH_USERNAME = "sregym" + PROVISIONER_SSH_PRIVATE_KEY_PATH = os.getenv("PROVISIONER_SSH_PRIVATE_KEY_PATH") + + #### Daemon Settings #### + SCHEDULER_INTERVAL_MINUTES = 5 + + #### SREGym Settings #### + DEFAULT_POD_NETWORK_CIDR = "192.168.0.0/16" + DEPLOY_KEY_PATH = os.getenv("DEPLOY_KEY_PATH") + + +CLOUD_LAB_CONTEXT_JSON = { + "framework": "emulab-ch2", + "cert-path": os.getenv("CLOUDLAB_CERT_PATH"), + "key-path": os.getenv("CLOUDLAB_KEY_PATH"), + "user-name": "sregym", + "user-urn": "urn:publicid:IDN+emulab.net+user+sregym", + "user-pubkeypath": os.getenv("PROVISIONER_SSH_PUBLIC_KEY_PATH"), + "project": os.getenv("CLOUD_PROJECT_NAME"), +} + +# Aggregates mapping +AGGREGATES_MAP = { + "clemson": Clemson, + "utah": Utah, + "wisconsin": Wisconsin, + "cloudlab clemson": Clemson, + "cloudlab utah": Utah, + "cloudlab wisconsin": Wisconsin, + "cl-clemson": Clemson, + "cl-wisconsin": Wisconsin, + "cl-utah": Utah, +} + +# Hardware types +PRIORITY_HARDWARE_TYPES = ["c220g5", "c220g4", "c220g3", "c220g2", "c220g1"] + +# OS types +OS_TYPES = [ + "UBUNTU22-64-STD", + "UBUNTU20-64-STD", + "UBUNTU18-64-STD", + "UBUNTU16-64-STD", + "DEBIAN11-64-STD", + "DEBIAN10-64-STD", + "FEDORA36-64-STD", + "CENTOS7-64-STD", + "CENTOS8-64-STD", + "RHEL8-64-STD", +] + +# The first error means deletion not successful have to retry +# The second error means experiment does not exist maybe already deleted and no need to retry +DELETE_EXPERIMENT_ERRORS = [ + "resource is busy; try again later", # -> retry + "No such slice here", # -> no need to retry, + "get_credentials encountered an error requesting the slice credential: No such Slice", # -> no need to retry, already expired + "expired on", # -> no need to retry, already expired +] diff --git a/provisioner/daemon.py b/provisioner/daemon.py new file mode 100755 index 0000000..7f186e4 --- /dev/null +++ b/provisioner/daemon.py @@ -0,0 +1,505 @@ +import datetime +import signal +import subprocess +import threading +import time +from typing import Optional + +from apscheduler.schedulers.blocking import BlockingScheduler +from apscheduler.triggers.interval import IntervalTrigger + +from provisioner.cloudlab_provisioner import CloudlabProvisioner +from provisioner.config.settings import DefaultSettings +from provisioner.state_manager import CLUSTER_STATUS, SREGYM_STATUS, StateManager +from provisioner.utils.email_sender import EmailSender +from provisioner.utils.logger import logger +from provisioner.utils.ssh import SSHManager +from scripts.geni_lib.cluster_setup import setup_cloudlab_cluster_with_sregym + +# Global stop event for graceful shutdown +stop_event = threading.Event() + + +class ProvisionerDaemon: + def __init__(self): + logger.info("Initializing Provisioner Daemon...") + self.state_manager = StateManager(db_path=DefaultSettings.DATABASE_PATH) + self.cloudlab = CloudlabProvisioner() + + self.scheduler = BlockingScheduler() + logger.info("Provisioner Daemon initialized.") + + def _get_ssh_manager( + self, hostname: str, port: int = 22, timeout: int = DefaultSettings.DEFAULT_SSH_TIME_OUT_SECONDS + ) -> SSHManager: + """ + Create an SSHManager instance for a given host. + """ + return SSHManager( + hostname=hostname, + username=DefaultSettings.PROVISIONER_DEFAULT_SSH_USERNAME, + private_key_path=DefaultSettings.PROVISIONER_SSH_PRIVATE_KEY_PATH, + port=port, + timeout=timeout, + ) + + def check_automatic_provisioning(self): + logger.info("Running: Automatic Provisioning Check") + try: + effective_pool_size = self.state_manager.count_total_available_clusters() + needed = DefaultSettings.MIN_AVAILABLE_CLUSTERS - effective_pool_size + + logger.info(f"Pool Status: EffectivePool={effective_pool_size}. Needed={needed}") + + for _ in range(max(0, needed)): + current_total_managed = self.state_manager.count_total_managed_clusters() + if current_total_managed >= DefaultSettings.MAX_TOTAL_CLUSTERS: + logger.warning( + f"Max total clusters ({DefaultSettings.MAX_TOTAL_CLUSTERS}) reached. Cannot auto-provision more." + ) + break + + logger.info(f"Attempting to auto-provision a new cluster. Current total: {current_total_managed}") + slice_name = self.cloudlab.generate_slice_name() + + # Record intention to provision + self.state_manager.create_cluster_record( + slice_name=slice_name, + aggregate_name="", + # hardware_type=DefaultSettings.DEFAULT_HARDWARE_TYPE, + os_type=DefaultSettings.DEFAULT_OS_TYPE, + node_count=DefaultSettings.DEFAULT_NODE_COUNT, + status=CLUSTER_STATUS.STATUS_AUTO_PROVISIONING, + ) + + experiment_info = None + + try: + experiment_info = self.cloudlab.create_experiment( + slice_name=slice_name, + hardware_type=DefaultSettings.DEFAULT_HARDWARE_TYPE, + os_type=DefaultSettings.DEFAULT_OS_TYPE, + node_count=DefaultSettings.DEFAULT_NODE_COUNT, + duration=DefaultSettings.UNCLAIMED_CLUSTER_TIMEOUT_HOURS, + ) + + if experiment_info and experiment_info.get("login_info"): + control_node_info = next((n for n in experiment_info["login_info"] if n[0] == "control"), None) + if not control_node_info: + raise ValueError("Control node info not found in login_info") + + hostname = control_node_info[2] + expires_at = datetime.datetime.now() + datetime.timedelta(hours=experiment_info["duration"]) + + self.state_manager.update_cluster_record( + slice_name, + aggregate_name=experiment_info["aggregate_name"], + hardware_type=experiment_info["hardware_type"], + control_node_hostname=hostname, + login_info=experiment_info["login_info"], + cloudlab_expires_at=expires_at, + # Status remains PROVISIONING until SREGym setup + ) + logger.info(f"Cluster {slice_name} provisioned by Cloudlab. Host: {hostname}") + + try: + while not self.cloudlab.are_nodes_ready(slice_name, experiment_info["aggregate_name"]): + logger.info(f"Waiting for nodes to be ready for {slice_name} on {hostname}...") + time.sleep(10) + logger.info(f"Nodes are ready for {slice_name} on {hostname}.") + except Exception as e: + logger.error(f"Error: {e}") + self.state_manager.update_cluster_record( + slice_name, status=CLUSTER_STATUS.STATUS_ERROR, last_error_message=str(e) + ) + continue + + # NOTE: not setting up SREGym when auto provisioning rather when user claims a cluster + # self._setup_sregym_and_finalize(experiment_info) + + self.state_manager.update_cluster_record( + slice_name, status=CLUSTER_STATUS.STATUS_UNCLAIMED_READY + ) + + else: + err_msg = f"Failed to create experiment {slice_name} via Cloudlab." + logger.error(err_msg) + self.state_manager.update_cluster_record( + slice_name, + status=CLUSTER_STATUS.STATUS_ERROR, + last_error_message=err_msg, + ) + except Exception as e: + logger.error(f"Error during Cloudlab provisioning for {slice_name}: {e}", exc_info=True) + self.state_manager.update_cluster_record( + slice_name, status=CLUSTER_STATUS.STATUS_ERROR, last_error_message=str(e) + ) + + # If was provisioned, delete the cluster + if experiment_info and experiment_info.get("aggregate_name"): + self.cloudlab.delete_experiment(slice_name, experiment_info["aggregate_name"]) + except Exception as e: + logger.error(f"Critical error in automatic provisioning check: {e}", exc_info=True) + + def _setup_sregym_and_finalize(self, experiment_info: dict): + """ + Setup SREGym and finalize cluster state. + """ + try: + slice_name = experiment_info["slice_name"] + login_info = experiment_info["login_info"] + + hosts = [info[2] for info in login_info] + + cfg = { + "cloudlab": { + "ssh_user": DefaultSettings.PROVISIONER_DEFAULT_SSH_USERNAME, + "ssh_key": DefaultSettings.PROVISIONER_SSH_PRIVATE_KEY_PATH, + "nodes": hosts, + }, + "pod_network_cidr": DefaultSettings.DEFAULT_POD_NETWORK_CIDR, + "deploy_sregym": True, + "deploy_key": DefaultSettings.DEPLOY_KEY_PATH, + } + + setup_cloudlab_cluster_with_sregym(cfg) + + logger.info(f"SREGym setup for {slice_name} completed successfully.") + + self.state_manager.update_cluster_record( + slice_name, + status=CLUSTER_STATUS.STATUS_UNCLAIMED_READY, + sregym_setup_status=SREGYM_STATUS.SREGYM_SUCCESS, + ) + except Exception as e: + logger.error(f"Error during SREGym setup for {slice_name}: {e}", exc_info=True) + self.state_manager.update_cluster_record( + slice_name, + status=CLUSTER_STATUS.STATUS_ERROR, + sregym_setup_status=SREGYM_STATUS.SREGYM_FAILED, + last_error_message="SREGym setup failed", + ) + raise e + + def check_unclaimed_cluster_timeout(self): + logger.info("Running: Unclaimed Cluster Timeout Check") + try: + unclaimed_clusters = self.state_manager.get_clusters_by_status(CLUSTER_STATUS.STATUS_UNCLAIMED_READY) + now = datetime.datetime.now() + + for cluster in unclaimed_clusters: + slice_name = cluster["slice_name"] + + created_at = cluster["created_at"] + + if not isinstance(created_at, datetime.datetime): + created_at = datetime.datetime.fromisoformat(str(created_at)) + + if now - created_at > datetime.timedelta(hours=DefaultSettings.UNCLAIMED_CLUSTER_TIMEOUT_HOURS): + logger.info( + f"Unclaimed cluster {slice_name} (in pool since {created_at}) has timed out. Marking for termination." + ) + # Always mark for termination. Auto-provisioning will handle replenishment. + self.state_manager.update_cluster_record(slice_name, status=CLUSTER_STATUS.STATUS_TERMINATING) + else: + logger.debug( + f"Unclaimed cluster {slice_name} (in pool since {created_at}) is within timeout window." + ) + except Exception as e: + logger.error(f"Critical error in unclaimed cluster timeout check: {e}", exc_info=True) + + # The provisioner should extend the cluster daily until the user reliquishing timeout + def check_claimed_cluster_extension(self): + logger.info("Running: Claimed Cluster Extension Check") + try: + claimed_clusters = self.state_manager.get_clusters_by_status(CLUSTER_STATUS.STATUS_CLAIMED) + now = datetime.datetime.now() + for cluster in claimed_clusters: + # Check if we need to extend based on last extension time + last_extended_at = cluster.get("last_extended_at") + if last_extended_at: + if not isinstance(last_extended_at, datetime.datetime): + last_extended_at = datetime.datetime.fromisoformat(str(last_extended_at)) + # If last extension was less than 24 hours ago, skip + if now - last_extended_at < datetime.timedelta( + hours=DefaultSettings.CLAIMED_CLUSTER_EXTENSION_CHECK_HOURS + ): + continue + + logger.info(f"Performing daily extension for claimed cluster {cluster['slice_name']}.") + new_duration_hours = DefaultSettings.CLAIMED_CLUSTER_DEFAULT_DURATION_HOURS + try: + if self.cloudlab.renew_experiment( + cluster["slice_name"], new_duration_hours, cluster["aggregate_name"] + ): + new_cloudlab_expires_at = now + datetime.timedelta(hours=new_duration_hours) + self.state_manager.update_cluster_record( + cluster["slice_name"], cloudlab_expires_at=new_cloudlab_expires_at, last_extended_at=now + ) + logger.info(f"Successfully extended {cluster['slice_name']} to {new_cloudlab_expires_at}.") + + else: + logger.error( + f"Failed to extend claimed cluster {cluster['slice_name']}. User should be notified." + ) + + try: + email_sender = EmailSender() + if email_sender.is_email_set(): + email_sender.send_cluster_extension_failure_notice( + to_addresses=[cluster["claimed_by_user_id"]], + cluster_name=cluster["slice_name"], + error_message="Failed to extend cluster", + current_expiry=cluster["cloudlab_expires_at"], + ) + except Exception as e: + logger.error(f"Error sending cluster extension failure notice: {e}", exc_info=True) + except Exception as e: + logger.error(f"Error extending claimed cluster {cluster['slice_name']}: {e}", exc_info=True) + + try: + email_sender = EmailSender() + if email_sender.is_email_set(): + email_sender.send_cluster_extension_failure_notice( + to_addresses=[cluster["claimed_by_user_id"]], + cluster_name=cluster["slice_name"], + error_message="Failed to extend cluster", + current_expiry=cluster["cloudlab_expires_at"], + ) + except Exception as e: + logger.error(f"Error sending cluster extension failure notice: {e}", exc_info=True) + except Exception as e: + logger.error(f"Critical error in claimed cluster extension check: {e}", exc_info=True) + + def _get_key_fingerprint(self, key_path: str) -> str: + result = subprocess.run(["ssh-keygen", "-lf", key_path], capture_output=True, text=True) + output = result.stdout.strip() + fingerprint = output.split()[1] # Get the SHA256:xxxxxxxx part + return fingerprint + + def _get_user_inactivity_duration(self, hostname: str) -> Optional[datetime.datetime]: + logger.info(f"Attempting to get actual last SSH time for {hostname}.") + try: + provisioner_fingerprint = self._get_key_fingerprint(self.cloudlab.user_pubkeypath) + + ssh_manager = self._get_ssh_manager(hostname) + + # Command to get SSH activity from remote auth.log with sudo + cmd = ( + "sudo cat /var/log/auth.log | grep sshd | grep 'Accepted publickey for' | awk '{print $1,$2,$3,$9,$16}'" + ) + stdout, stderr, exit_code = ssh_manager.execute_ssh_command(cmd) + + if exit_code != 0 or not stdout: + logger.warning(f"No SSH activity found for {hostname}. Exit code: {exit_code}, Error: {stderr}") + return None + + # Parse the timestamps from the log entries + provisioner_timestamps = [] + non_provisioner_timestamps = [] + + for line in stdout.splitlines(): + try: + parts = line.split() + if len(parts) >= 3: + # Combine month, day, and time + timestamp_str = " ".join(parts[:3]) + timestamp = datetime.datetime.strptime(timestamp_str, "%b %d %H:%M:%S") + # Add current year since log entries don't include it + timestamp = timestamp.replace(year=datetime.datetime.now().year) + + # Check if this is a provisioner SSH + if provisioner_fingerprint in line: + provisioner_timestamps.append(timestamp) + else: + non_provisioner_timestamps.append(timestamp) + + except Exception as e: + logger.warning(f"Failed to parse timestamp from line: {line}, error: {e}") + continue + + # Since we just SSH'd in with provisioner key, the the latest provisioner time is the current time + current_time = provisioner_timestamps[-1] + + if not provisioner_timestamps: + logger.warning(f"No provisioner SSH activity found for {hostname}") + return None + + # Case 1: If we have non-provisioner SSH activity + if non_provisioner_timestamps: + last_non_provisioner = max(non_provisioner_timestamps) + time_diff = current_time - last_non_provisioner + logger.info(f"Last non-provisioner SSH was {time_diff.total_seconds()/3600:.2f} hours ago") + return time_diff + + # Case 2: If no non-provisioner SSH activity, use first provisioner time + else: + time_diff = current_time - provisioner_timestamps[0] + logger.info( + f"No non-provisioner SSH found. First provisioner SSH was {time_diff.total_seconds()/3600:.2f} hours ago" + ) + return time_diff + + except Exception as e: + logger.error(f"Error getting SSH time for {hostname}: {e}", exc_info=True) + return None + + def check_claimed_cluster_inactivity(self): + logger.info("Running: Claimed Cluster Inactivity Check") + try: + claimed_clusters = self.state_manager.get_clusters_by_status(CLUSTER_STATUS.STATUS_CLAIMED) + now = datetime.datetime.now() + for cluster in claimed_clusters: + slice_name = cluster["slice_name"] + if cluster.get("evaluation_override") in (True, 1): + logger.debug(f"Cluster {slice_name} has evaluation override. Skipping inactivity check.") + continue + + # Get latest duration from all nodes + node_durations = [] + for node in cluster["login_info"]: + hostname = node[2] + node_durations.append(self._get_user_inactivity_duration(hostname)) + + # Get the latest duration + user_inactivity_duration = min(node_durations) + + if user_inactivity_duration is None: + logger.warning(f"No user inactivity duration found for {slice_name}. Skipping inactivity check.") + continue + + self.state_manager.update_cluster_record(slice_name, last_activity_at=now - user_inactivity_duration) + + if user_inactivity_duration > datetime.timedelta( + hours=DefaultSettings.CLAIMED_CLUSTER_INACTIVITY_TIMEOUT_HOURS + ): + logger.info(f"Claimed cluster {slice_name} inactive for {user_inactivity_duration}. Relinquishing.") + self.state_manager.update_cluster_record( + slice_name, + status=CLUSTER_STATUS.STATUS_TERMINATING, + claimed_by_user_id=None, + user_ssh_key_installed=False, + ) + + try: + email_sender = EmailSender() + if email_sender.is_email_set(): + email_sender.send_inactive_cluster_deletion_notice( + to_addresses=[cluster["claimed_by_user_id"]], + cluster_name=cluster["slice_name"], + last_activity=now - user_inactivity_duration, + ) + logger.info( + f"Sent inactive cluster deletion notice to {cluster['claimed_by_user_id']} for cluster {slice_name}" + ) + except Exception as e: + logger.error(f"Error sending inactive cluster deletion notice: {e}", exc_info=True) + else: + logger.debug( + f"Cluster {slice_name} last activity at {now - user_inactivity_duration} is within inactivity window." + ) + except Exception as e: + logger.error(f"Critical error in claimed cluster inactivity check: {e}", exc_info=True) + + def process_terminating_clusters(self): + logger.info("Running: Process Terminating Clusters") + try: + terminating_clusters = self.state_manager.get_clusters_by_status(CLUSTER_STATUS.STATUS_TERMINATING) + + for cluster in terminating_clusters: + slice_name = cluster["slice_name"] + aggregate_name = cluster["aggregate_name"] + logger.info(f"Attempting to terminate cluster {slice_name} on {aggregate_name}.") + try: + if not aggregate_name or aggregate_name == "": + logger.warning( + f"Cannot terminate {slice_name}, aggregate_name is unknown ('{aggregate_name}'). Deleting DB record only." + ) + self.state_manager.delete_cluster_record(slice_name) + continue + + if self.cloudlab.delete_experiment(slice_name, aggregate_name): + logger.info(f"Successfully deleted experiment {slice_name} from Cloudlab.") + self.state_manager.delete_cluster_record(slice_name) + logger.info(f"Removed cluster record for {slice_name}.") + else: + err_msg = f"Cloudlab API failed to delete {slice_name}. Will retry on next check." + logger.error(err_msg) + self.state_manager.update_cluster_record( + slice_name, last_error_message=err_msg, status=CLUSTER_STATUS.STATUS_TERMINATING + ) + except Exception as e: + err_msg = f"Error deleting {slice_name} from Cloudlab: {e}" + logger.error(err_msg + ". Will retry on next check.", exc_info=True) + self.state_manager.update_cluster_record( + slice_name, last_error_message=err_msg, status=CLUSTER_STATUS.STATUS_TERMINATING + ) + + except Exception as e: + logger.error(f"Critical error in processing terminating clusters: {e}", exc_info=True) + + def run_all_checks(self): + """Runs all periodic checks in sequence.""" + if stop_event.is_set(): + logger.info("Stop event received by run_all_checks, skipping scheduled run.") + return + + logger.info("======== Starting Periodic Checks Cycle ========") + try: + self.check_unclaimed_cluster_timeout() + self.check_claimed_cluster_inactivity() + self.check_automatic_provisioning() + self.check_claimed_cluster_extension() + self.process_terminating_clusters() + except Exception as e: + logger.critical(f"Unhandled exception during periodic checks cycle: {e}", exc_info=True) + logger.info("======== Finished Periodic Checks Cycle ========") + + def start(self): + logger.info("Starting Provisioner Daemon Scheduler...") + # Run once immediately at start, then schedule + try: + logger.info("Performing initial run of all checks...") + self.run_all_checks() + logger.info("Initial run of checks complete.") + except Exception as e: + logger.critical(f"Initial run of checks failed critically: {e}", exc_info=True) + + # Schedule jobs + self.scheduler.add_job( + self.run_all_checks, + trigger=IntervalTrigger(seconds=DefaultSettings.DEFAULT_SSH_TIME_OUT_SECONDS), + id="provisioner_main_checks_job", + name="Run all provisioner checks", + replace_existing=True, + misfire_grace_time=300, + max_instances=1, + ) + + try: + self.scheduler.start() + except (KeyboardInterrupt, SystemExit): + logger.info("Scheduler stopped by user/system.") + finally: + if self.scheduler.running: + logger.info("Shutting down scheduler...") + self.scheduler.shutdown(wait=True) + logger.info("Provisioner Daemon scheduler shut down.") + + # --- Signal Handler and Main Execution --- + _scheduler_instance = None + + def signal_handler(signum, frame): + global _scheduler_instance + logger.info(f"Signal {signal.Signals(signum).name} received, initiating graceful shutdown...") + stop_event.set() + if _scheduler_instance and _scheduler_instance.running: + logger.info("Requesting scheduler shutdown...") + _scheduler_instance.shutdown(wait=False) + else: + logger.info("Scheduler not running or not initialized for signal handler.") + + +if __name__ == "__main__": + daemon = ProvisionerDaemon() + daemon.start() diff --git a/provisioner/setup_daemon.sh b/provisioner/setup_daemon.sh new file mode 100755 index 0000000..39e8c8d --- /dev/null +++ b/provisioner/setup_daemon.sh @@ -0,0 +1,57 @@ +#!/bin/bash + +# Exit on error +set -e + +# Check if running as root +if [ "$EUID" -ne 0 ]; then + echo "Please run as root" + exit 1 +fi + +# Get the absolute path of the provisioner directory +PROVISIONER_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +ROOT_DIR=$(dirname "$PROVISIONER_DIR") +VENV_PATH="$ROOT_DIR/.venv" + +# Create the service file +echo "Creating systemd service file..." +cat > /etc/systemd/system/provisioner.service << EOF +[Unit] +Description=Cloudlab Provisioner Daemon +After=network.target + +[Service] +Type=simple +WorkingDirectory=$PROVISIONER_DIR +ExecStart=$VENV_PATH/bin/python $PROVISIONER_DIR/daemon.py +Restart=on-failure +RestartSec=5 +Environment=PYTHONPATH=$ROOT_DIR:$PROVISIONER_DIR + +[Install] +WantedBy=multi-user.target +EOF + +# Set proper permissions +chmod 644 /etc/systemd/system/provisioner.service + +# Reload systemd to recognize new service +echo "Reloading systemd..." +systemctl daemon-reload + +# Stop if already running +echo "Stopping service..." +systemctl stop provisioner.service + +# Start the service +echo "Starting service..." +systemctl start provisioner.service + +# Check status +echo "Checking service status..." +systemctl status provisioner.service + +echo "Setup complete! The provisioner daemon should now be running." +echo "You can check the logs using:" +echo " - journalctl -u provisioner.service -f" \ No newline at end of file diff --git a/provisioner/state_manager.py b/provisioner/state_manager.py new file mode 100644 index 0000000..bbade48 --- /dev/null +++ b/provisioner/state_manager.py @@ -0,0 +1,364 @@ +import datetime +import json +import sqlite3 +from pathlib import Path +from typing import Any, Dict, List, Optional + +from provisioner.utils.logger import logger + + +class CLUSTER_STATUS: + STATUS_AUTO_PROVISIONING = "auto_provisioning" + STATUS_USER_PROVISIONING = "user_provisioning" + STATUS_UNCLAIMED_READY = "unclaimed_ready" + STATUS_CLAIMED = "claimed" + # STATUS_PENDING_CLEANUP = "pending_cleanup" + STATUS_TERMINATING = "terminating" + STATUS_ERROR = "error" + STATUS_TERMINATED = "terminated" + + +class SREGYM_STATUS: + SREGYM_PENDING = "pending" + SREGYM_SUCCESS = "success" + SREGYM_FAILED = "failed" + SREGYM_NOT_ATTEMPTED = "not_attempted" + + +class StateManager: + def __init__(self, db_path: str): + self.db_path = Path(db_path) + self._init_db() + + def _get_db_connection(self) -> sqlite3.Connection: + conn = sqlite3.connect(self.db_path, detect_types=sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA foreign_keys = ON;") + return conn + + def _init_db(self): + conn = self._get_db_connection() + try: + with self._get_db_connection() as conn: + cursor = conn.cursor() + + # Users Table + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS users ( + user_id TEXT PRIMARY KEY, + ssh_public_key TEXT NOT NULL, + registered_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + )""" + ) + + # Clusters Table + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS clusters ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + slice_name TEXT UNIQUE NOT NULL, + aggregate_name TEXT, + status TEXT NOT NULL DEFAULT 'auto_provisioning', + hardware_type TEXT, + os_type TEXT, + node_count INTEGER, + login_info TEXT, + control_node_hostname TEXT, + claimed_by_user_id TEXT, + user_ssh_key_installed BOOLEAN DEFAULT FALSE, + sregym_setup_status TEXT DEFAULT 'not_attempted', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + claimed_at TIMESTAMP, + last_extended_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + last_activity_at TIMESTAMP, + cloudlab_expires_at TIMESTAMP, + evaluation_override BOOLEAN DEFAULT FALSE, + last_error_message TEXT, + FOREIGN KEY (claimed_by_user_id) REFERENCES users(user_id) ON DELETE SET NULL + ) + """ + ) + conn.commit() + logger.info(f"Database initialized/checked at {self.db_path}") + except sqlite3.Error as e: + logger.error(f"Database initialization error: {e}", exc_info=True) + raise e + + # --- User Management --- + def add_user(self, user_id: str, ssh_public_key: str) -> bool: + try: + with self._get_db_connection() as conn: + conn.execute("INSERT INTO users (user_id, ssh_public_key) VALUES (?, ?)", (user_id, ssh_public_key)) + conn.commit() + logger.info(f"User {user_id} registered.") + return True + except sqlite3.IntegrityError: + logger.warning(f"User {user_id} already exists.") + return False + except sqlite3.Error as e: + logger.error(f"Error adding user {user_id}: {e}", exc_info=True) + return False + + def get_user(self, user_id: str) -> Optional[Dict[str, Any]]: + try: + with self._get_db_connection() as conn: + cursor = conn.execute("SELECT * FROM users WHERE user_id = ?", (user_id,)) + row = cursor.fetchone() + return dict(row) if row else None + except sqlite3.Error as e: + logger.error(f"Error getting user {user_id}: {e}", exc_info=True) + raise e + + def user_exists(self, user_id: str) -> bool: + return self.get_user(user_id) is not None + + # --- Cluster Management --- + def create_cluster_record( + self, + slice_name: str, + aggregate_name: str, + os_type: str, + node_count: int, + hardware_type: Optional[str] = None, + login_info: Optional[List[List[Any]]] = None, + status: str = CLUSTER_STATUS.STATUS_AUTO_PROVISIONING, + cloudlab_expires_at: Optional[datetime.datetime] = None, + claimed_by_user_id: Optional[str] = None, + evaluation_override: bool = False, + last_extended_at: Optional[datetime.datetime] = None, + ) -> Optional[str]: + """Creates a new cluster record. Returns slice_name on success.""" + now = datetime.datetime.now() + login_info_json = json.dumps(login_info) if login_info else None + try: + with self._get_db_connection() as conn: + conn.execute( + """ + INSERT INTO clusters (slice_name, aggregate_name, status, hardware_type, os_type, node_count, + created_at, last_activity_at, cloudlab_expires_at, claimed_by_user_id, + evaluation_override, login_info, last_extended_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + slice_name, + aggregate_name, + status, + hardware_type, + os_type, + node_count, + now, + now, + cloudlab_expires_at, + claimed_by_user_id, + evaluation_override, + login_info_json, + last_extended_at or now, + ), + ) + conn.commit() + logger.info(f"Cluster record created for {slice_name} with status {status}.") + return slice_name + except sqlite3.IntegrityError: + logger.error(f"Cluster with slice_name {slice_name} already exists.", exc_info=True) + raise sqlite3.IntegrityError(f"Cluster with slice_name {slice_name} already exists.") + except sqlite3.Error as e: + logger.error(f"Error creating cluster record for {slice_name}: {e}", exc_info=True) + raise e + + def get_cluster_by_slice_name(self, slice_name: str) -> Optional[Dict[str, Any]]: + try: + with self._get_db_connection() as conn: + cursor = conn.execute("SELECT * FROM clusters WHERE slice_name = ?", (slice_name,)) + row = cursor.fetchone() + + if row: + cluster_data = dict(row) + if cluster_data.get("login_info"): + try: + cluster_data["login_info"] = json.loads(cluster_data["login_info"]) + except json.JSONDecodeError: + logger.error(f"Invalid JSON in login_info for cluster {slice_name}.") + cluster_data["login_info"] = None + return cluster_data + else: + return None + + except sqlite3.Error as e: + logger.error(f"Error getting cluster {slice_name}: {e}", exc_info=True) + raise e + + def update_cluster_record(self, slice_name: str, **kwargs) -> bool: + if not kwargs: + logger.warning(f"No fields provided to update for cluster {slice_name}.") + raise ValueError("No fields provided to update for cluster {slice_name}.") + + valid_keys = [ + # "slice_name", + "aggregate_name", + "hardware_type", + # "os_type", + # "node_count", + "status", + "control_node_hostname", + "login_info", + "claimed_by_user_id", + "user_ssh_key_installed", + "sregym_setup_status", + "claimed_at", + "last_extended_at", + "last_activity_at", + "cloudlab_expires_at", + "evaluation_override", + "last_error_message", + "created_at", + ] + + set_clauses = [] + values = [] + for key, value in kwargs.items(): + if key not in valid_keys: + logger.error(f"Invalid field '{key}' for cluster update.") + return False + + if key == "login_info" and value is not None: + value = json.dumps(value) + + set_clauses.append(f"{key} = ?") + values.append(value) + + values.append(slice_name) + sql = f"UPDATE clusters SET {', '.join(set_clauses)} WHERE slice_name = ?" + + try: + with self._get_db_connection() as conn: + conn.execute(sql, tuple(values)) + conn.commit() + logger.info(f"Cluster {slice_name} updated with: {kwargs}") + except sqlite3.Error as e: + logger.error(f"Error updating cluster {slice_name}: {e}", exc_info=True) + raise e + + def delete_cluster_record(self, slice_name: str, soft_delete: bool = True) -> bool: + try: + with self._get_db_connection() as conn: + if soft_delete: + cursor = conn.execute( + "UPDATE clusters SET status = ? WHERE slice_name = ?", + (CLUSTER_STATUS.STATUS_TERMINATED, slice_name), + ) + else: + cursor = conn.execute("DELETE FROM clusters WHERE slice_name = ?", (slice_name,)) + conn.commit() + if cursor.rowcount > 0: + logger.info(f"Cluster record {slice_name} deleted.") + else: + logger.warning(f"No cluster record found to delete for {slice_name}.") + except sqlite3.Error as e: + logger.error(f"Error deleting cluster record {slice_name}: {e}", exc_info=True) + raise e + + # --- Specific Queries --- + + def _parse_cluster_row(self, row: sqlite3.Row) -> Dict[str, Any]: + """Helper to convert a row to dict and parse login_info.""" + if not row: + return {} + cluster_data = dict(row) + if cluster_data.get("login_info"): + try: + cluster_data["login_info"] = json.loads(cluster_data["login_info"]) + except json.JSONDecodeError: + logger.warning(f"Failed to parse login_info for cluster ID {cluster_data.get('id')}") + cluster_data["login_info"] = None + return cluster_data + + def get_clusters_by_status(self, status: str) -> List[Dict[str, Any]]: + try: + with self._get_db_connection() as conn: + cursor = conn.execute("SELECT * FROM clusters WHERE status = ?", (status,)) + rows = cursor.fetchall() + return [self._parse_cluster_row(row) for row in rows] + except sqlite3.Error as e: + logger.error(f"Error getting clusters with status {status}: {e}", exc_info=True) + raise e + + def get_unclaimed_ready_clusters(self) -> List[Dict[str, Any]]: + return self.get_clusters_by_status(CLUSTER_STATUS.STATUS_UNCLAIMED_READY) + + def get_claimed_clusters_by_user(self, user_id: str) -> List[Dict[str, Any]]: + try: + with self._get_db_connection() as conn: + cursor = conn.execute( + "SELECT * FROM clusters WHERE claimed_by_user_id = ? AND status = ?", + (user_id, CLUSTER_STATUS.STATUS_CLAIMED), + ) + rows = cursor.fetchall() + return [self._parse_cluster_row(row) for row in rows] + except sqlite3.Error as e: + logger.error(f"Error getting claimed clusters for user {user_id}: {e}", exc_info=True) + raise e + + def count_total_managed_clusters(self) -> int: + """Counts clusters that contribute to the MAX_TOTAL_CLUSTERS limit.""" + # These are clusters that are active or in a state that will soon become active/cleaned. + # Excludes clusters that are definitely gone or in a permanent error state. + managed_statuses = ( + CLUSTER_STATUS.STATUS_AUTO_PROVISIONING, + CLUSTER_STATUS.STATUS_USER_PROVISIONING, + CLUSTER_STATUS.STATUS_UNCLAIMED_READY, + CLUSTER_STATUS.STATUS_CLAIMED, + # CLUSTER_STATUS.STATUS_PENDING_CLEANUP, + ) + + try: + with self._get_db_connection() as conn: + cursor = conn.execute( + f"SELECT COUNT(*) FROM clusters WHERE status IN (? , ? , ? , ?)", managed_statuses + ) + count = cursor.fetchone()[0] + return count if count is not None else 0 + except sqlite3.Error as e: + logger.error(f"Error counting total managed clusters: {e}", exc_info=True) + raise e + + def count_total_available_clusters(self) -> int: + """Counts clusters that are available to be claimed.""" + available_statuses = ( + CLUSTER_STATUS.STATUS_AUTO_PROVISIONING, + # CLUSTER_STATUS.STATUS_USER_PROVISIONING, + CLUSTER_STATUS.STATUS_UNCLAIMED_READY, + # CLUSTER_STATUS.STATUS_PENDING_CLEANUP, + ) + + try: + with self._get_db_connection() as conn: + cursor = conn.execute(f"SELECT COUNT(*) FROM clusters WHERE status IN (? , ?)", available_statuses) + count = cursor.fetchone()[0] + return count if count is not None else 0 + except sqlite3.Error as e: + logger.error(f"Error counting total available clusters: {e}", exc_info=True) + raise e + + def count_user_claimed_clusters(self, user_id: str) -> int: + try: + with self._get_db_connection() as conn: + cursor = conn.execute( + "SELECT COUNT(*) FROM clusters WHERE claimed_by_user_id = ? AND (status = ? OR status = ?)", + (user_id, CLUSTER_STATUS.STATUS_CLAIMED, CLUSTER_STATUS.STATUS_USER_PROVISIONING), + ) + count = cursor.fetchone()[0] + return count if count is not None else 0 + except sqlite3.Error as e: + logger.error(f"Error counting clusters for user {user_id}: {e}", exc_info=True) + raise e + + def get_all_clusters(self) -> List[Dict[str, Any]]: + try: + with self._get_db_connection() as conn: + cursor = conn.execute("SELECT * FROM clusters ORDER BY created_at DESC") + rows = cursor.fetchall() + return [dict(row) for row in rows] + except sqlite3.Error as e: + logger.error(f"Error getting all clusters: {e}", exc_info=True) + raise e diff --git a/provisioner/utils/__init__.py b/provisioner/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/provisioner/utils/email_sender.py b/provisioner/utils/email_sender.py new file mode 100644 index 0000000..0a8d017 --- /dev/null +++ b/provisioner/utils/email_sender.py @@ -0,0 +1,160 @@ +import os +import re +import smtplib +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +from typing import List, Optional + +from dotenv import load_dotenv + +load_dotenv(override=True) + + +class EmailSender: + # SMTP Configuration + SMTP_SERVER = os.getenv("SMTP_SERVER", "") + SMTP_PORT = int(os.getenv("SMTP_PORT", "")) + SMTP_USERNAME = os.getenv("SMTP_USERNAME", "") + SMTP_PASSWORD = os.getenv("SMTP_PASSWORD", "") # For Gmail, this should be an App Password + SMTP_USE_TLS = os.getenv("SMTP_USE_TLS", "true").lower() == "true" + + # Email Configuration + DEFAULT_FROM_EMAIL = os.getenv("DEFAULT_FROM_EMAIL", SMTP_USERNAME) + DEFAULT_REPLY_TO = os.getenv("DEFAULT_REPLY_TO", "") + EMAIL_TIMEOUT = int(os.getenv("EMAIL_TIMEOUT", "30")) # seconds + + @staticmethod + def is_gmail_app_password(password: str) -> bool: + return bool(re.match(r"^[a-zA-Z0-9]{16}$", password)) + + def __init__( + self, + smtp_server: str = None, + smtp_port: int = None, + username: str = None, + password: str = None, + use_tls: bool = None, + ): + self.smtp_server = smtp_server or self.SMTP_SERVER + self.smtp_port = smtp_port or self.SMTP_PORT + self.username = username or self.SMTP_USERNAME + self.password = password or self.SMTP_PASSWORD + self.use_tls = use_tls if use_tls is not None else self.SMTP_USE_TLS + + # Validate Gmail configuration + if "gmail.com" in self.smtp_server.lower(): + if not self.is_gmail_app_password(self.password): + raise ValueError("For Gmail, you must use an App Password.") + + def is_email_set(self) -> bool: + return ( + self.username is not None + and self.password is not None + and self.smtp_server is not None + and self.smtp_port is not None + ) + + def send_email( + self, + to_addresses: List[str], + subject: str, + body: str, + cc_addresses: Optional[List[str]] = None, + bcc_addresses: Optional[List[str]] = None, + is_html: bool = False, + ) -> bool: + try: + # Create message + msg = MIMEMultipart() + msg["From"] = self.username + msg["To"] = ", ".join(to_addresses) + msg["Subject"] = subject + + if cc_addresses: + msg["Cc"] = ", ".join(cc_addresses) + + # Attach body + content_type = "html" if is_html else "plain" + msg.attach(MIMEText(body, content_type)) + + # Combine all recipients + all_recipients = to_addresses.copy() + if cc_addresses: + all_recipients.extend(cc_addresses) + if bcc_addresses: + all_recipients.extend(bcc_addresses) + + # Connect to SMTP server and send email + with smtplib.SMTP(self.smtp_server, self.smtp_port) as server: + if self.use_tls: + server.starttls() + server.login(self.username, self.password) + server.send_message(msg, self.username, all_recipients) + + return True + + except Exception as e: + return False + + def send_html_email( + self, + to_addresses: List[str], + subject: str, + html_body: str, + cc_addresses: Optional[List[str]] = None, + bcc_addresses: Optional[List[str]] = None, + ) -> bool: + return self.send_email( + to_addresses=to_addresses, + subject=subject, + body=html_body, + cc_addresses=cc_addresses, + bcc_addresses=bcc_addresses, + is_html=True, + ) + + def send_inactive_cluster_deletion_notice( + self, + to_addresses: List[str], + cluster_name: str, + last_activity: str, + ) -> bool: + """ + Send notification about an inactive cluster scheduled for deletion. + """ + subject = f"Cluster '{cluster_name}' Scheduled for Deletion" + + html_body = f""" + + +

    Your cluster {cluster_name} has been inactive for quite a while and is being deleted..

    +

    Last activity: {last_activity}

    + + + """ + + return self.send_html_email(to_addresses=to_addresses, subject=subject, html_body=html_body) + + def send_cluster_extension_failure_notice( + self, + to_addresses: List[str], + cluster_name: str, + error_message: str, + current_expiry: str, + ) -> bool: + """ + Send notification about a failed cluster extension attempt. + """ + subject = f"Failed to Extend Cluster '{cluster_name}'" + + html_body = f""" + + +

    Failed to extend cluster {cluster_name}.

    +

    Error: {error_message}

    +

    Current expiry: {current_expiry}

    + + + """ + + return self.send_html_email(to_addresses=to_addresses, subject=subject, html_body=html_body) diff --git a/provisioner/utils/logger.py b/provisioner/utils/logger.py new file mode 100644 index 0000000..5b9e39e --- /dev/null +++ b/provisioner/utils/logger.py @@ -0,0 +1,31 @@ +import logging +import os +from datetime import datetime +from logging.handlers import TimedRotatingFileHandler + +from provisioner.config.settings import DefaultSettings + +# Configure the root logger +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +# Avoid adding handlers if already configured +if not logger.handlers: + # Console handler + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.INFO) + + # Formatter + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + + # File handler with date-based rotation + log_dir = DefaultSettings.LOG_PATH + os.makedirs(log_dir, exist_ok=True) + + log_file = os.path.join(log_dir, f"provisioner_{datetime.now().strftime('%Y-%m-%d')}.log") + file_handler = TimedRotatingFileHandler(log_file, when="midnight", interval=1, backupCount=7, encoding="utf-8") + file_handler.setLevel(logging.INFO) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) diff --git a/provisioner/utils/parser.py b/provisioner/utils/parser.py new file mode 100644 index 0000000..3e162f8 --- /dev/null +++ b/provisioner/utils/parser.py @@ -0,0 +1,113 @@ +import html +import json +import xml.etree.ElementTree as ET + +import requests +from bs4 import BeautifulSoup + + +def parse_sliver_info(xml_text): + root = ET.fromstring(xml_text) + + # Get experiment description + rspec_tour = root.find(".//{http://www.protogeni.net/resources/rspec/ext/apt-tour/1}description") + description = rspec_tour.text if rspec_tour is not None else "No description" + + # Get expiration + expiration = root.get("expires", "No expiration date") + + # Parse node information + nodes = [] + for node in root.findall(".//{http://www.geni.net/resources/rspec/3}node"): + node_info = { + "client_id": node.get("client_id"), + "component_id": node.get("component_id"), + "hardware": node.find(".//{http://www.protogeni.net/resources/rspec/ext/emulab/1}vnode").get( + "hardware_type" + ), + "os_image": node.find(".//{http://www.protogeni.net/resources/rspec/ext/emulab/1}vnode").get("disk_image"), + } + + # Get host information + host = node.find(".//{http://www.geni.net/resources/rspec/3}host") + if host is not None: + node_info["hostname"] = host.get("name") + node_info["public_ip"] = host.get("ipv4") + + # Get interface information + interface = node.find(".//{http://www.geni.net/resources/rspec/3}interface") + if interface is not None: + ip = interface.find(".//{http://www.geni.net/resources/rspec/3}ip") + if ip is not None: + node_info["internal_ip"] = ip.get("address") + node_info["netmask"] = ip.get("netmask") + + nodes.append(node_info) + + # Get location information + location = root.find(".//{http://www.protogeni.net/resources/rspec/ext/site-info/1}location") + location_info = { + "country": location.get("country") if location is not None else None, + "latitude": location.get("latitude") if location is not None else None, + "longitude": location.get("longitude") if location is not None else None, + } + + return { + "description": description, + "expiration": expiration, + "nodes": nodes, + "location": location_info, + } + + +def collect_and_parse_hardware_info(): + portal_hardware_url = "https://www.cloudlab.us/portal-hardware.php" + + try: + response = requests.get(portal_hardware_url) + response.raise_for_status() + html_content = response.text + soup = BeautifulSoup(html_content, "html.parser") + amlist_script_tag = soup.find("script", {"id": "amlist-json", "type": "text/plain"}) + + if not amlist_script_tag: + return None + + escaped_json_string = amlist_script_tag.string + if not escaped_json_string: + return None + + unescaped_json_string = html.unescape(escaped_json_string) + amlist_data = json.loads(unescaped_json_string) + + extracted_hardware_list = [] + for urn_key, urn_info in amlist_data.items(): + if isinstance(urn_info, dict): + cluster_name = urn_info.get("name", "N/A") + typeinfo = urn_info.get("typeinfo") + + if cluster_name not in [ + "Cloudlab Utah", + "Cloudlab Wisconsin", + "Cloudlab Clemson", + ]: + continue + + if isinstance(typeinfo, dict): + for hw_name, hw_stats in typeinfo.items(): + if isinstance(hw_stats, dict): + total_count = hw_stats.get("count", 0) + free_count = hw_stats.get("free", 0) + + extracted_hardware_list.append( + { + "hardware_name": hw_name, + "cluster_name": cluster_name, + "urn": urn_key, + "total": total_count, + "free": free_count, + } + ) + return extracted_hardware_list + except Exception as e: + return None diff --git a/provisioner/utils/ssh.py b/provisioner/utils/ssh.py new file mode 100644 index 0000000..5c3a1ee --- /dev/null +++ b/provisioner/utils/ssh.py @@ -0,0 +1,181 @@ +import os +import time +from typing import Optional, Tuple + +import paramiko + +from provisioner.config.settings import DefaultSettings +from provisioner.utils.logger import logger + + +class SSHUtilError(Exception): + """Custom exception for SSH utility errors.""" + + pass + + +class SSHManager: + def __init__( + self, + hostname: str, + username: str, + private_key_path: Optional[str] = None, + port: int = 22, + timeout: int = DefaultSettings.DEFAULT_SSH_TIME_OUT_SECONDS, + max_retries: int = 10, + retry_delay: int = 2, # seconds + ): + self.hostname = hostname + self.username = username + self.private_key_path = private_key_path + self.port = port + self.timeout = timeout + self.max_retries = max_retries + self.retry_delay = retry_delay + + def _create_ssh_client(self) -> paramiko.SSHClient: + client = paramiko.SSHClient() + client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + for attempt in range(1, self.max_retries + 1): + try: + if self.private_key_path: + self.private_key_path = os.path.expanduser(self.private_key_path) + if not os.path.exists(self.private_key_path): + raise SSHUtilError(f"Private key file not found: {self.private_key_path}") + try: + # Try ED25519 first + key = paramiko.Ed25519Key.from_private_key_file(self.private_key_path) + except paramiko.SSHException: + try: + # Fall back to RSA + key = paramiko.RSAKey.from_private_key_file(self.private_key_path) + except paramiko.SSHException as e: + raise SSHUtilError(f"Failed to load private key: {e}") + logger.debug( + f"Attempting SSH connection to {self.username}@{self.hostname}:{self.port} using private key {self.private_key_path} (attempt {attempt}/{self.max_retries})" + ) + client.connect( + self.hostname, port=self.port, username=self.username, pkey=key, timeout=self.timeout + ) + else: + raise SSHUtilError("SSH connection requires either a private key.") + logger.info(f"Successfully connected to {self.username}@{self.hostname}:{self.port}") + return client + except (paramiko.AuthenticationException, paramiko.SSHException, Exception) as e: + if attempt < self.max_retries: + logger.warning( + f"SSH connection attempt {attempt} failed: {e}. Retrying in {self.retry_delay} seconds..." + ) + time.sleep(self.retry_delay) + else: + msg = f"SSH connection failed after {self.max_retries} attempts for {self.username}@{self.hostname}: {e}" + logger.error(msg, exc_info=True) + if isinstance(e, paramiko.AuthenticationException): + raise SSHUtilError(msg) from e + elif isinstance(e, paramiko.SSHException): + raise SSHUtilError(msg) from e + else: + raise SSHUtilError(msg) from e + + def execute_ssh_command( + self, + command: str, + ) -> Tuple[str, str, int]: + client = None + try: + client = self._create_ssh_client() + logger.info(f"Executing command on {self.hostname}: {command}") + stdin, stdout, stderr = client.exec_command(command, timeout=self.timeout) + + # It's important to read stdout and stderr before checking exit_status + stdout_output = stdout.read().decode("utf-8", errors="replace").strip() + stderr_output = stderr.read().decode("utf-8", errors="replace").strip() + + # exit_status_ready() can be used to check if status is available without blocking + # recv_exit_status() will block until the command finishes. + exit_code = stdout.channel.recv_exit_status() + + if stdout_output: + logger.debug(f"Command stdout on {self.hostname}: {stdout_output}") + if stderr_output: + logger.warning(f"Command stderr on {self.hostname}: {stderr_output}") + logger.info(f"Command on {self.hostname} finished with exit code: {exit_code}") + + return stdout_output, stderr_output, exit_code + except SSHUtilError: + logger.error(f"Error creating SSH client for {self.hostname}:{self.username}:{self.port}") + raise + except paramiko.SSHException as e: + msg = f"Error executing command '{command}' on {self.hostname}: {e}" + logger.error(msg) + raise SSHUtilError(msg) from e + except Exception as e: + msg = f"An unexpected error occurred while executing command on {self.hostname}: {e}" + logger.error(msg, exc_info=True) + raise SSHUtilError(msg) from e + finally: + if client: + client.close() + logger.debug(f"SSH connection to {self.hostname} closed.") + + def upload_file_scp( + self, + local_path: str, + remote_path: str, + ): + local_path = os.path.expanduser(local_path) + if not os.path.exists(local_path): + raise FileNotFoundError(f"Local file not found: {local_path}") + + client = None + sftp = None + try: + client = self._create_ssh_client() + sftp = client.open_sftp() + logger.info(f"Uploading {local_path} to {self.username}@{self.hostname}:{remote_path}") + sftp.put(local_path, remote_path) + logger.info(f"Successfully uploaded {local_path} to {remote_path} on {self.hostname}") + except SSHUtilError: + msg = f"Error creating SSH client for {self.hostname}:{self.username}:{self.port}" + logger.error(msg) + raise SSHUtilError(msg) + except Exception as e: + msg = f"Error uploading file {local_path} to {self.hostname}:{remote_path}: {e}" + logger.error(msg, exc_info=True) + raise SSHUtilError(msg) from e + finally: + if sftp: + sftp.close() + if client: + client.close() + logger.debug(f"SSH connection to {self.hostname} closed (after upload).") + + def download_file_scp( + self, + remote_path: str, + local_path: str, + ): + local_path = os.path.expanduser(local_path) + client = None + sftp = None + try: + client = self._create_ssh_client() + sftp = client.open_sftp() + logger.info(f"Downloading {self.username}@{self.hostname}:{remote_path} to {local_path}") + sftp.get(remote_path, local_path) + logger.info(f"Successfully downloaded {remote_path} from {self.hostname} to {local_path}") + except SSHUtilError: + msg = f"Error creating SSH client for {self.hostname}:{self.username}:{self.port}" + logger.error(msg) + raise SSHUtilError(msg) + except Exception as e: + msg = f"Error downloading file {remote_path} from {self.hostname} to {local_path}: {e}" + logger.error(msg, exc_info=True) + raise SSHUtilError(msg) from e + finally: + if sftp: + sftp.close() + if client: + client.close() + logger.debug(f"SSH connection to {self.hostname} closed (after download).") diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4c3f563 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,184 @@ +[project] +name = "sregym" +requires-python = ">= 3.12" +version = "0.0.1" + +dependencies = [ + "ansible>=11.8.0", + "anthropic>=0.52.0", + "anyio==4.9.0", + "apscheduler>=3.11.0", + "asgiref==3.8.1", + "attrs==25.3.0", + "autogen-agentchat==0.2.40", + "azure-ai-ml==1.27.1", + "azure-common==1.1.28", + "azure-core==1.34.0", + "azure-core-tracing-opentelemetry==1.0.0b12", + "azure-identity==1.23.0", + "azure-mgmt-core==1.5.0", + "azure-monitor-opentelemetry==1.6.9", + "azure-monitor-opentelemetry-exporter==1.0.0b36", + "azure-storage-blob==12.25.1", + "azure-storage-file-datalake==12.20.0", + "azure-storage-file-share==12.21.0", + "bashlex>=0.18", + "bcrypt==4.3.0", + "black==24.10.0", + "blake3==1.0.5", + "boto3>=1.28.57", + "bs4>=0.0.2", + "cachetools==5.5.2", + "certifi==2025.4.26", + "cffi==1.17.1", + "charset-normalizer==3.4.2", + "click==8.2.1", + "cloudpickle==3.1.1", + "colorama==0.4.6", + "cryptography==45.0.3", + "dateparser==1.2.1", + "deprecated==1.2.18", + "dill==0.4.0", + "distro==1.9.0", + "dnspython==2.7.0", + "docker==7.1.0", + "elastic-transport==8.17.1", + "elasticsearch==8.18.1", + "email-validator==2.2.0", + "fastapi[standard]==0.115.12", + "fastapi-cli==0.0.7", + "fastmcp>=2.9.2", + "filelock==3.18.0", + "flaml==2.3.4", + "fsspec==2025.5.1", + "geni-lib-xlab", + "gguf==0.10.0", + "gitpython==3.1.44", + "google-auth==2.40.2", + "h11==0.16.0", + "httpcore==1.0.9", + "httptools==0.6.4", + "httpx==0.28.1", + "idna==3.10", + "importlib-metadata==8.6.1", + "itsdangerous>=2.2.0", + "jinja2==3.1.6", + "jsonschema==4.23.0", + "kubernetes==30.1.0", + "langchain>=0.3.25", + "langchain-litellm>=0.2.2", + "langchain-openai>=0.3.18", + "langgraph>=0.4.7", + "langsmith>=0.3.43", + "litellm>=1.75.0", + "markdown-it-py==3.0.0", + "markupsafe==3.0.2", + "mcp[cli]>=1.9.1", + "msal==1.32.3", + "msal-extensions==1.3.1", + "multidict==6.4.4", + "mypy-extensions==1.1.0", + "nest-asyncio==1.6.0", + "networkx==3.4.2", + "nodeenv==1.9.1", + "numpy==1.26.4", + "oauthlib==3.2.2", + "opentelemetry-api==1.31.1", + "opentelemetry-instrumentation==0.52b1", + "opentelemetry-instrumentation-asgi==0.52b1", + "opentelemetry-instrumentation-dbapi==0.52b1", + "opentelemetry-instrumentation-django==0.52b1", + "opentelemetry-instrumentation-fastapi==0.52b1", + "opentelemetry-instrumentation-flask==0.52b1", + "opentelemetry-instrumentation-psycopg2==0.52b1", + "opentelemetry-instrumentation-requests==0.52b1", + "opentelemetry-instrumentation-urllib==0.52b1", + "opentelemetry-instrumentation-urllib3==0.52b1", + "opentelemetry-instrumentation-wsgi==0.52b1", + "opentelemetry-resource-detector-azure==0.1.5", + "opentelemetry-sdk==1.31.1", + "opentelemetry-semantic-conventions==0.52b1", + "opentelemetry-util-http==0.52b1", + "packaging>=24.0", + "pandas==2.2.3", + "paramiko==3.5.1", + "pathspec==0.12.1", + "platformdirs==4.3.8", + "pre-commit>=4.2.0", + "prometheus-client==0.22.0", + "prometheus-fastapi-instrumentator==7.1.0", + "prompt-toolkit==3.0.51", + "protobuf==6.31.0", + "psutil==6.1.1", + "pydantic==2.11.5", + "pydantic-core==2.33.2", + "pydash==8.0.5", + "pygments==2.19.1", + "pyjwt==2.10.1", + "pynacl==1.5.0", + "pyparsing==3.2.3", + "pyright==1.1.401", + "pytest>=8.3.5", + "python-dateutil==2.9.0.post0", + "python-dotenv==1.1.0", + "python-multipart==0.0.20", + "pytz==2025.2", + "pyyaml==6.0.2", + "pyzmq==26.4.0", + "referencing==0.36.2", + "regex==2024.11.6", + "requests>=2.32.3", + "requests-oauthlib==2.0.0", + "rich==13.9.4", + "rich-toolkit==0.14.6", + "rsa==4.9.1", + "sentry-sdk==2.29.1", + "setproctitle==1.3.6", + "shellingham==1.5.4", + "sniffio==1.3.1", + "starlette==0.46.2", + "termcolor==3.1.0", + "tiktoken==0.7.0", + "tqdm==4.67.1", + "typer==0.16.0", + "typing-extensions==4.13.2", + "typing-inspection==0.4.1", + "tzdata==2025.2", + "tzlocal==5.3.1", + "urllib3>=2.4.0", + "uvicorn==0.34.2", + "uvloop==0.21.0", + "watchfiles==1.0.5", + "websocket-client==1.8.0", + "websockets==15.0.1", + "yarl==1.20.0", + "pyfiglet>=1.0.3", + "langchain-ibm>=0.3.15", + "langchain-google-genai>=2.1.12", + "dash>=3.2.0", + "aiosignal>=1.3.2", + "locust>=2.42.3", +] + +[tool.black] +line-length = 120 +target-version = ["py312"] + +[tool.isort] +profile = "black" +line_length = 120 + +[build-system] +requires = ["setuptools>=80.8.0"] +build-backend = "setuptools.build_meta" + +[dependency-groups] +dev = [ + "deptry>=0.24.0", +] + +[tool.setuptools] +packages = ["sregym", "clients", "provisioner", "scripts"] + +[tool.uv.sources] +geni-lib-xlab = { path = "scripts/geni_lib/mod/geni_lib_xlab-1.0.0.tar.gz" } diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/ansible/README.md b/scripts/ansible/README.md new file mode 100644 index 0000000..e7c1ce3 --- /dev/null +++ b/scripts/ansible/README.md @@ -0,0 +1,31 @@ +## Environment Setup + +This is the instruction to use Ansible to build a remote cluster for SREGym. We currently use [CloudLab](https://www.cloudlab.us/) but we believe this will work on any servers you have access to. + + +### 1) Modify the inventory file +```bash +cp inventory.yml.example inventory.yml +``` + +Modify the IPs and user names in the inventory file accordingly, `inventory.yml`. + +### 2) Run the Ansible playbook +```shell +ansible-playbook -i inventory.yml setup_cluster.yml +``` + +After these, you should see every node running inside the cluster: +```shell +kubectl get nodes +``` + +### Common Errors +If you're running into issues from Ansible related to host key authentication, try typing `yes` in your terminal for each node, or proceeding with the following steps: + +You can create a file in the same directory as this README called `ansible.cfg` to turn off that warning: +```yaml +[defaults] +host_key_checking = False +``` +Be mindful about the security implications of disabling host key checking, if you're not aware ask someone who is. diff --git a/scripts/ansible/ansible.cfg b/scripts/ansible/ansible.cfg new file mode 100644 index 0000000..86cfd2f --- /dev/null +++ b/scripts/ansible/ansible.cfg @@ -0,0 +1,2 @@ +[defaults] +host_key_checking = False \ No newline at end of file diff --git a/scripts/ansible/inventory.yml.example b/scripts/ansible/inventory.yml.example new file mode 100644 index 0000000..e21e24c --- /dev/null +++ b/scripts/ansible/inventory.yml.example @@ -0,0 +1,19 @@ + +all: + vars: + k8s_user: + k8s_user2: + children: + control_nodes: + hosts: + control_node: + ansible_host: + ansible_user: "{{ k8s_user }}" + worker_nodes: + hosts: + worker_node_1: + ansible_host: + ansible_user: "{{ k8s_user2 }}" + worker_node_2: + ansible_host: + ansible_user: "{{ k8s_user2 }}" \ No newline at end of file diff --git a/scripts/ansible/setup_cluster.yml b/scripts/ansible/setup_cluster.yml new file mode 100644 index 0000000..45b4852 --- /dev/null +++ b/scripts/ansible/setup_cluster.yml @@ -0,0 +1,815 @@ +--- +- hosts: all + gather_facts: yes + become: true + vars: + docker_key_url: https://download.docker.com/linux/ubuntu/gpg + docker_repo: "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" + # kube_version: "1.31.0" # Adjust Kubernetes version as needed + cri_dockerd_version: "0.3.12.3-0" # Example version for cri-dockerd, customize as needed + + tasks: + - name: Kill any stuck apt processes + shell: | + pkill -9 apt-get || true + sleep 2 + become: yes + ignore_errors: yes + changed_when: false + + - name: Wait for apt lock to be released + shell: | + for i in {1..30}; do + if ! lsof /var/lib/apt/lists/lock 2>/dev/null && ! lsof /var/cache/apt/archives/lock 2>/dev/null; then + echo "Lock released" + break + fi + sleep 1 + done + become: yes + changed_when: false + + - name: Fix interrupted dpkg state + shell: dpkg --configure -a + become: yes + changed_when: false + ignore_errors: yes + + # CRITICAL: Install Ubuntu generic 6.8 kernel and headers (must match for dm_dust) + - name: Install Ubuntu generic 6.8 kernel and matching headers + apt: + name: + - linux-image-generic-6.8 + - linux-headers-generic-6.8 + state: present + update_cache: yes + become: yes + timeout: 900 + + - name: Get current running kernel version + shell: uname -r + register: current_kernel_check + changed_when: false + + - name: Get installed Ubuntu generic 6.8 kernel version + shell: dpkg -l | grep 'linux-image-6.8.0-.*-generic' | grep '^ii' | awk '{print $2}' | sed 's/linux-image-//' | head -1 + register: target_kernel_check + changed_when: false + + - name: Set kernel version facts + set_fact: + current_kernel_version: "{{ current_kernel_check.stdout }}" + target_kernel_version: "{{ target_kernel_check.stdout }}" + need_kernel_switch: "{{ current_kernel_check.stdout != target_kernel_check.stdout }}" + + - name: Debug kernel versions + debug: + msg: "Current kernel: {{ current_kernel_version }}, Target kernel: {{ target_kernel_version }}, Switch needed: {{ need_kernel_switch }}" + + - name: Set Ubuntu generic kernel as default in GRUB + shell: | + KERNEL_VERSION="{{ target_kernel_version }}" + MENU_ENTRY="Advanced options for Ubuntu>Ubuntu, with Linux $KERNEL_VERSION" + sed -i "s/^GRUB_DEFAULT=.*/GRUB_DEFAULT=\"$MENU_ENTRY\"/" /etc/default/grub + update-grub + become: yes + when: need_kernel_switch + register: grub_updated + + - name: Reboot to switch to matching Ubuntu generic kernel + reboot: + reboot_timeout: 600 + pre_reboot_delay: 10 + post_reboot_delay: 60 + become: yes + when: need_kernel_switch + + - name: Wait for system to come back after kernel switch + wait_for_connection: + delay: 10 + timeout: 600 + when: need_kernel_switch + + - name: Get current kernel version + shell: uname -r + register: kernel_after_fix + changed_when: false + + - name: Fix apt lock file permissions and clean up + shell: | + chmod 644 /var/lib/apt/lists/lock 2>/dev/null || true + chown root:root /var/lib/apt/lists/lock 2>/dev/null || true + rm -f /var/cache/apt/archives/lock 2>/dev/null || true + # Remove lock files that may cause issues + rm -f /var/lib/dpkg/lock-frontend 2>/dev/null || true + rm -f /var/lib/dpkg/lock 2>/dev/null || true + become: yes + changed_when: false + ignore_errors: yes + + - name: Remove old Kubernetes repository e.g., v1.29 if it exists + file: + path: /etc/apt/sources.list.d/kubernetes.list + state: absent + + - name: Refresh and upgrade packages + apt: + update_cache: "yes" + upgrade: "yes" + + # - name: Install python3-apt + # apt: + # name: python3-apt + # state: present + # become: true + + - name: Remove old Docker versions if any + apt: + name: "{{ item }}" + state: absent + loop: + - docker.io + - docker-doc + - docker-compose + - podman-docker + - containerd + - runc + + - name: Install prerequisites for Docker and Kubernetes (batched) + apt: + name: + - ca-certificates + - curl + - gnupg + - lsb-release + - socat + - conntrack + - libssl-dev + state: present + update_cache: no + become: yes + + - name: Add Docker GPG key and repository + shell: | + install -m 0755 -d /etc/apt/keyrings + curl -fsSL {{ docker_key_url }} -o /etc/apt/keyrings/docker.asc + echo "{{ docker_repo }}" | tee /etc/apt/sources.list.d/docker.list + args: + creates: /etc/apt/sources.list.d/docker.list + become: yes + + - name: Update apt and install Docker + apt: + update_cache: yes + name: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-buildx-plugin + - docker-compose-plugin + state: present + + - name: Add user to docker group to run Docker without sudo + user: + name: "{{ ansible_user_id }}" + groups: docker + append: yes + + - name: Get system architecture + command: dpkg --print-architecture + register: system_arch + changed_when: false + + - name: Download cri-dockerd .deb package (amd64) + get_url: + url: "https://github.com/Mirantis/cri-dockerd/releases/download/v0.3.20/cri-dockerd_0.3.20.3-0.ubuntu-jammy_amd64.deb" + dest: /tmp/install-cri-dockerd.deb + when: system_arch.stdout == "amd64" + + - name: Install CRI-Dockerd from .deb package (amd64) + apt: + deb: /tmp/install-cri-dockerd.deb + state: present + when: system_arch.stdout == "amd64" + + - name: Download cri-dockerd .tgz archive (arm64) + get_url: + url: "https://github.com/Mirantis/cri-dockerd/releases/download/v0.3.20/cri-dockerd-0.3.20.arm64.tgz" + dest: /tmp/cri-dockerd.tgz + when: system_arch.stdout == "arm64" + + - name: Extract cri-dockerd archive (arm64) + unarchive: + src: /tmp/cri-dockerd.tgz + dest: /tmp/ + remote_src: yes + when: system_arch.stdout == "arm64" + + - name: Install cri-dockerd binaries (arm64) + copy: + src: "/tmp/cri-dockerd/{{ item }}" + dest: "/usr/local/bin/{{ item }}" + mode: '0755' + remote_src: yes + loop: + - cri-dockerd + when: system_arch.stdout == "arm64" + + - name: Download cri-dockerd systemd service files (arm64) + get_url: + url: "https://raw.githubusercontent.com/Mirantis/cri-dockerd/v0.3.20/packaging/systemd/{{ item }}" + dest: "/etc/systemd/system/{{ item }}" + loop: + - cri-docker.service + - cri-docker.socket + when: system_arch.stdout == "arm64" + + - name: Fix cri-dockerd binary path in service file (arm64) + replace: + path: /etc/systemd/system/cri-docker.service + regexp: '/usr/bin/cri-dockerd' + replace: '/usr/local/bin/cri-dockerd' + when: system_arch.stdout == "arm64" + + - name: Reload systemd daemon (arm64) + systemd: + daemon_reload: yes + when: system_arch.stdout == "arm64" + + - name: Start and enable cri-dockerd service + systemd: + name: cri-docker + enabled: yes + state: started + + - name: Check if cri-dockerd socket exists + stat: + path: /var/run/cri-dockerd.sock + register: cri_socket + + - name: Install QEMU user-static for x86_64 emulation on ARM nodes + apt: + name: qemu-user-static + state: present + when: system_arch.stdout == "arm64" + register: qemu_install + + - name: Verify QEMU registration + shell: ls /proc/sys/fs/binfmt_misc/ | grep qemu-x86_64 + register: qemu_check + failed_when: false + changed_when: false + when: system_arch.stdout == "arm64" + + - name: Display QEMU status + debug: + msg: "QEMU x86_64 emulation is {{ 'enabled' if qemu_check.rc == 0 else 'not found' }}" + when: system_arch.stdout == "arm64" + + # - name: Wait for cri-dockerd socket to be available + # wait_for: + # path: /var/run/cri-dockerd.sock + # state: present + # timeout: 30 + # when: not cri_socket.stat.exists + + - name: Install Kubernetes packages + apt: + name: + - apt-transport-https + - curl + - gpg + state: present + + # - name: Add Kubernetes GPG key and repository + # shell: | + # echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/{{ kube_version }}/deb/ /" | sudo tee /etc/apt/sources.list.d/kubernetes.list + # curl -fsSL https://pkgs.k8s.io/core:/stable:/{{ kube_version }}/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg + # args: + # creates: /etc/apt/sources.list.d/kubernetes.list + + - name: Remove existing Kubernetes GPG key if it exists + file: + path: /etc/apt/keyrings/kubernetes-apt-keyring.gpg + state: absent + + - name: Add Kubernetes GPG key + shell: | + curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.31/deb/Release.key | gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg + args: + creates: /etc/apt/keyrings/kubernetes-apt-keyring.gpg + become: yes + + - name: Add Kubernetes repository for v1.31 + shell: | + echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.31/deb/ /' | tee /etc/apt/sources.list.d/kubernetes.list + args: + creates: /etc/apt/sources.list.d/kubernetes.list + become: yes + + - name: Install kubeadm, kubelet, kubectl + apt: + update_cache: yes + name: + - kubeadm + - kubelet + - kubectl + state: present + + # - name: Hold Kubernetes packages at installed version + # apt: + # name: + # - kubeadm + # - kubelet + # - kubectl + # state: present + # mark: hold + + - name: Hold Kubernetes packages at installed version + shell: | + apt-mark hold kubeadm kubelet kubectl + become: yes + + - name: Disable swap + shell: | + swapoff -a + sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab + become: yes + ignore_errors: yes + changed_when: false + + - name: Install software-properties-common for PPA support + apt: + name: software-properties-common + state: present + update_cache: yes + become: yes + + - name: Verify kernel matches headers + assert: + that: + - kernel_after_fix.stdout is defined + - "'6.8.0-' in kernel_after_fix.stdout" + - "'-generic' in kernel_after_fix.stdout" + fail_msg: "Kernel doesn't match expected Ubuntu generic format. Current: {{ kernel_after_fix.stdout | default('undefined') }}" + success_msg: "Kernel matches Ubuntu generic 6.8: {{ kernel_after_fix.stdout }}" + + - name: Confirm kernel and headers match + debug: + msg: "Running kernel: {{ kernel_after_fix.stdout }} (should match headers)" + + - name: Remove broken kernel headers if they cause dependency issues + shell: | + # Remove any broken headers that prevent package installation + dpkg -l | grep "linux-headers.*6\.8\." | grep -v "^ii" | awk '{print $2}' | xargs -r apt-get remove --purge -y || true + # Also remove headers with unmet dependencies + if apt-get check 2>&1 | grep -q "linux-headers.*6\.8"; then + dpkg -l | grep "linux-headers.*6\.8\." | awk '{print $2}' | xargs -r apt-get remove --purge -y || true + fi + apt-get install -f -y || true + ignore_errors: yes + become: yes + + - name: Install additional packages for module building + apt: + name: + - flex + - bison + - bc + - libelf-dev + - build-essential + - gcc-12 + state: present + update_cache: no + become: yes + + - name: Get full kernel version + shell: uname -r + register: kernel_version_full + changed_when: false + + - name: Check if dm_dust and dm_flakey modules exist + shell: | + DUST=$(modinfo dm_dust 2>/dev/null && echo "exists" || echo "not_found") + FLAKEY=$(modinfo dm_flakey 2>/dev/null && echo "exists" || echo "not_found") + echo "dm_dust:$DUST" + echo "dm_flakey:$FLAKEY" + register: dm_modules_check + changed_when: false + + - name: Set module facts (reusable throughout playbook) + set_fact: + dm_dust_exists: "{{ 'dm_dust:exists' in dm_modules_check.stdout }}" + dm_flakey_exists: "{{ 'dm_flakey:exists' in dm_modules_check.stdout }}" + + - name: Check if Ubuntu kernel headers are available + stat: + path: "/usr/src/linux-headers-{{ kernel_version_full.stdout }}" + register: ubuntu_headers + changed_when: false + + - name: Install generic 6.8 kernel headers (should already match running kernel) + apt: + name: linux-headers-generic-6.8 + state: present + update_cache: no + when: > + (not dm_dust_exists or not dm_flakey_exists) + and not ubuntu_headers.stat.exists + become: yes + ignore_errors: yes + + - name: Re-check headers after installation attempt + stat: + path: "/usr/src/linux-headers-{{ kernel_version_full.stdout }}" + register: ubuntu_headers + changed_when: false + + - name: Fail if headers not available + fail: + msg: "Kernel headers not available for {{ kernel_version_full.stdout }}. Cannot build modules." + when: > + (not dm_dust_exists or not dm_flakey_exists) + and not ubuntu_headers.stat.exists + + - name: Check if build symlink exists + stat: + path: "/lib/modules/{{ kernel_version_full.stdout }}/build" + register: build_symlink + changed_when: false + when: > + (not dm_dust_exists or not dm_flakey_exists) + and ubuntu_headers.stat.exists + + - name: Create build symlink if headers exist but symlink is missing + file: + src: "/usr/src/linux-headers-{{ kernel_version_full.stdout }}" + dest: "/lib/modules/{{ kernel_version_full.stdout }}/build" + state: link + become: yes + when: > + (not dm_dust_exists or not dm_flakey_exists) + and ubuntu_headers.stat.exists + and (build_symlink is defined and (not build_symlink.stat.exists or not build_symlink.stat.islnk)) + + - name: Ensure kernel module directory exists + file: + path: "/lib/modules/{{ kernel_version_full.stdout }}/kernel/drivers/md" + state: directory + mode: '0755' + become: yes + when: not dm_dust_exists or not dm_flakey_exists + + - name: Load dm_dust module if available + modprobe: + name: dm_dust + state: present + when: dm_dust_exists + ignore_errors: yes + become: yes + + - name: Build and install dm_dust module if not available + block: + - name: Create dm-dust build directory + file: + path: /tmp/dm-dust-build + state: directory + + # Download kernel source for dm-dust.c + - name: Extract kernel major.minor version + set_fact: + kernel_major_minor: "{{ kernel_version_full.stdout | regex_replace('^([0-9]+\\.[0-9]+).*', '\\1') }}" + + - name: Debug kernel version for download + debug: + msg: "Full kernel: {{ kernel_version_full.stdout }}, Major.Minor: {{ kernel_major_minor }}" + + - name: Check if kernel source already downloaded + stat: + path: /tmp/linux-{{ kernel_major_minor }}.tar.xz + register: kernel_source_exists + + - name: Download kernel source for dm-dust source code + get_url: + url: "https://cdn.kernel.org/pub/linux/kernel/v6.x/linux-{{ kernel_major_minor }}.tar.xz" + dest: /tmp/linux-{{ kernel_major_minor }}.tar.xz + when: not kernel_source_exists.stat.exists + register: kernel_source_download + + - name: Check if kernel source already extracted + stat: + path: /tmp/linux-{{ kernel_major_minor }}/drivers/md/dm-dust.c + register: dm_dust_source_exists + + - name: Extract only the dm-dust source file from kernel tarball + shell: | + cd /tmp + tar -xJf linux-{{ kernel_major_minor }}.tar.xz linux-{{ kernel_major_minor }}/drivers/md/dm-dust.c + when: not dm_dust_source_exists.stat.exists + args: + creates: /tmp/linux-{{ kernel_major_minor }}/drivers/md/dm-dust.c + + - name: Copy dm-dust source to build directory + shell: | + cp /tmp/linux-{{ kernel_major_minor }}/drivers/md/dm-dust.c /tmp/dm-dust-build/dm-dust.c + args: + creates: /tmp/dm-dust-build/dm-dust.c + + - name: Create Makefile for dm-dust (use system headers) + copy: + dest: /tmp/dm-dust-build/Makefile + content: | + obj-m := dm-dust.o + KDIR := /lib/modules/$(shell uname -r)/build + PWD := $(shell pwd) + CC := gcc-12 + + default: + $(MAKE) -C $(KDIR) M=$(PWD) modules CC=gcc-12 + + clean: + $(MAKE) -C $(KDIR) M=$(PWD) clean + + install: + mkdir -p /lib/modules/$(shell uname -r)/kernel/drivers/md/ + cp dm-dust.ko /lib/modules/$(shell uname -r)/kernel/drivers/md/ + depmod -a + mode: '0644' + + - name: Check if dm-dust module already built + stat: + path: /tmp/dm-dust-build/dm-dust.ko + register: dm_dust_ko_precheck + + - name: Build dm-dust module + make: + chdir: /tmp/dm-dust-build + environment: + KBUILD_MODPOST_WARN: "1" + CC: "gcc-12" + ignore_errors: yes + register: dm_dust_build_result + when: not dm_dust_ko_precheck.stat.exists + + - name: Verify dm_dust module was built successfully + stat: + path: /tmp/dm-dust-build/dm-dust.ko + register: dm_dust_ko + failed_when: not dm_dust_ko.stat.exists + ignore_errors: yes + + - name: Install dm-dust module + make: + chdir: /tmp/dm-dust-build + target: install + become: yes + ignore_errors: yes + when: dm_dust_ko.stat.exists | default(false) + + - name: Verify dm_dust module is installed + stat: + path: "/lib/modules/{{ kernel_version_full.stdout }}/kernel/drivers/md/dm-dust.ko" + register: dm_dust_installed + failed_when: false + changed_when: false + + - name: Load dm_dust module + modprobe: + name: dm_dust + state: present + become: yes + ignore_errors: yes + register: dm_dust_load_result + + - name: Debug dm_dust module load result + debug: + msg: "dm_dust module load result: {{ dm_dust_load_result }}" + + - name: Verify dm_dust module can be loaded + shell: | + modprobe dm_dust + lsmod | grep -q "^dm_dust" || exit 1 + dmsetup targets | grep -q "dust" || exit 1 + echo "dm_dust module is loaded and functional" + become: yes + register: dm_dust_load_test + failed_when: dm_dust_load_test.rc != 0 + changed_when: false + ignore_errors: yes + + - name: Clean up build directory after successful install + file: + path: /tmp/dm-dust-build + state: absent + when: dm_dust_installed.stat.exists + when: not dm_dust_exists + + - name: Load dm_flakey module (built into kernel 6.8.x) + modprobe: + name: dm_flakey + state: present + ignore_errors: yes + become: yes + + - name: Load necessary kernel modules + shell: | + modprobe overlay + modprobe br_netfilter + modprobe dm_dust || true + modprobe dm_flakey || true + become: yes + args: + creates: /etc/modules-load.d/k8s.conf + + - name: Create modules-load configuration for Kubernetes and modules + copy: + content: | + overlay + br_netfilter + dm_dust + dm_flakey + dest: /etc/modules-load.d/k8s.conf + become: yes + + - name: Configure sysctl for Kubernetes networking + copy: + content: | + net.bridge.bridge-nf-call-iptables = 1 + net.bridge.bridge-nf-call-ip6tables = 1 + net.ipv4.ip_forward = 1 + dest: /etc/sysctl.d/k8s.conf + notify: Reload sysctl + + - name: Start and enable kubelet + systemd: + name: kubelet + enabled: yes + state: started + + handlers: + - name: Install CRI-Dockerd + apt: + deb: /tmp/cri-dockerd.deb + + - name: Reload sysctl + command: sysctl --system + become: yes + +# Control Node Setup +- hosts: control_node # Control plane tasks + become: true + vars: + kubeconfig_path: "/users/{{ k8s_user }}/.kube/config" + tasks: + - name: Resolve control node hostname to IP + command: getent ahosts "{{ hostvars['control_node'].ansible_host }}" + register: resolved_ip_output + - name: Parse resolved IP from output + set_fact: + resolved_control_plane_ip: "{{ resolved_ip_output.stdout_lines[0].split(' ')[0] }}" + - name: Set resolved_control_plane_ip globally + add_host: + name: "global" + resolved_control_plane_ip: "{{ resolved_control_plane_ip }}" + - name: Initialize Kubernetes control plane + shell: | + kubeadm init --pod-network-cidr=10.244.0.0/16 --cri-socket /var/run/cri-dockerd.sock --apiserver-advertise-address={{ resolved_control_plane_ip }} + args: + creates: /etc/kubernetes/admin.conf + - name: Ensure .kube directory exists + file: + path: "/users/{{ k8s_user }}/.kube" + state: directory + mode: '0755' + owner: "{{ k8s_user }}" + # group: "{{ k8s_user }}" + become: true + - name: Temporarily set permissions to read admin.conf + file: + path: /etc/kubernetes/admin.conf + mode: '0644' + become: true + + - name: Set up kube config for kubectl on control plane + copy: + src: /etc/kubernetes/admin.conf + dest: "/users/{{ k8s_user }}/.kube/config" + mode: '0644' + remote_src: true + become: true + become_method: sudo + - name: Ensure ownership of kube config for kubectl + file: + path: "/users/{{ k8s_user }}/.kube/config" + owner: "{{ k8s_user }}" + # group: "{{ k8s_user }}" + mode: '0644' + become: true + - name: Display ansible_user_id + debug: + msg: "ansible_user_id is {{ ansible_user_id }}" + + - name: Fetch admin.conf to localhost for kubeconfig + fetch: + src: /etc/kubernetes/admin.conf + dest: ~/.kube/config + flat: yes + become: true + - name: Generate kubeadm join command + shell: kubeadm token create --print-join-command + register: kubeadm_join_command + - name: Extract kube_token and cert_hash from join command + set_fact: + kube_token: "{{ (kubeadm_join_command.stdout | regex_search('--token\\s+([\\w.]+)', '\\1')).0 }}" + cert_hash: "{{ (kubeadm_join_command.stdout | regex_search('--discovery-token-ca-cert-hash\\s+sha256:([\\w]+)', '\\1')).0 }}" + + - name: Display kube_token + debug: + msg: "kube_token is {{ kube_token }}" + - name: Display cert_hash + debug: + msg: "cert_hash is {{ cert_hash }}" + - name: Install Flannel network plugin + shell: | + kubectl apply -f https://github.com/flannel-io/flannel/releases/latest/download/kube-flannel.yml + args: + creates: /etc/kubernetes/kube-flannel.yml + environment: + KUBECONFIG: "{{ kubeconfig_path }}" + - name: Untaint the control plane to host pods + shell: kubectl taint nodes $(hostname) node-role.kubernetes.io/control-plane:NoSchedule- || true + environment: + KUBECONFIG: "{{ kubeconfig_path }}" +# Worker Node Setup +- hosts: worker_nodes + become: true + tasks: + - name: Join Kubernetes cluster + shell: | + kubeadm join {{ hostvars['global'].resolved_control_plane_ip }}:6443 --token {{ hostvars['control_node'].kube_token }} --discovery-token-ca-cert-hash sha256:{{ hostvars['control_node'].cert_hash }} --cri-socket unix:///var/run/cri-dockerd.sock --v=5 + args: + creates: /var/lib/kubelet/kubeadm-flags.env + become: true + - name: Ensure .kube directory exists + file: + path: "/users/{{ ansible_user }}/.kube" + state: directory + mode: '0755' + become_user: "{{ ansible_user }}" # Ensure directory is created under the correct user + - name: Display ansible_user + debug: + msg: "ansible_user is {{ ansible_user }}" + +# Label Worker Nodes +- hosts: control_node + become: true + vars: + kubeconfig_path: "/users/{{ k8s_user }}/.kube/config" + tasks: + - name: Wait for worker nodes to be ready + shell: | + timeout=120 + elapsed=0 + while [ $elapsed -lt $timeout ]; do + if kubectl get nodes --no-headers | grep -v "control-plane\|master" | grep -q "Ready"; then + echo "Worker nodes are ready" + exit 0 + fi + sleep 2 + elapsed=$((elapsed + 2)) + done + echo "Timeout waiting for worker nodes after ${timeout}s" + exit 1 + environment: + KUBECONFIG: "{{ kubeconfig_path }}" + ignore_errors: yes + + - name: Get all node names + shell: kubectl get nodes --no-headers -o custom-columns=NAME:.metadata.name + environment: + KUBECONFIG: "{{ kubeconfig_path }}" + register: all_nodes + changed_when: false + + - name: Label worker nodes with node-role.kubernetes.io/worker + shell: | + node="{{ item }}" + # Get node labels as YAML and check for control-plane/master labels + node_yaml=$(kubectl get node "$node" -o yaml) + if echo "$node_yaml" | grep -q "node-role.kubernetes.io/control-plane:" || echo "$node_yaml" | grep -q "node-role.kubernetes.io/master:"; then + echo "Skipping control plane node: $node" + elif echo "$node_yaml" | grep -q "node-role.kubernetes.io/worker:"; then + echo "Node $node already has worker label" + else + kubectl label node "$node" node-role.kubernetes.io/worker= --overwrite + echo "Labeled $node as worker" + fi + environment: + KUBECONFIG: "{{ kubeconfig_path }}" + loop: "{{ all_nodes.stdout_lines }}" + ignore_errors: yes + register: label_result + + - name: Display labeling result + debug: + msg: "{{ label_result.stdout_lines | default(['No output']) }}" \ No newline at end of file diff --git a/scripts/ansible/ssh/hosts.txt b/scripts/ansible/ssh/hosts.txt new file mode 100644 index 0000000..c1cc49d --- /dev/null +++ b/scripts/ansible/ssh/hosts.txt @@ -0,0 +1,3 @@ +pc790.emulab.net +pc853.emulab.net +pc854.emulab.net diff --git a/scripts/ansible/ssh/keys.sh b/scripts/ansible/ssh/keys.sh new file mode 100755 index 0000000..6928fd2 --- /dev/null +++ b/scripts/ansible/ssh/keys.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +WORKERS_FILE="hosts.txt" + +# Read the file line by line, +# including the last line +# even if it doesn't have a newline +while IFS= read -r host_ip || [[ -n "$host_ip" ]]; do + echo "Generating SSH key on $host_ip..." + + ssh "$host_ip" "[ -f ~/.ssh/id_rsa.pub ] || (echo 'Creating SSH key on $host_ip'; ssh-keygen -t rsa -b 4096 -f ~/.ssh/id_rsa -N '' -q)" + + ssh "$host_ip" "[ -f ~/.ssh/id_rsa.pub ]" && echo "Key successfully created on $host_ip" || echo "Failed to create SSH key on $host_ip. Please check connection or permissions." +done < "$WORKERS_FILE" + +ALL_KEYS_FILE="all_worker_keys.tmp" +> "$ALL_KEYS_FILE" # Clear file if it exists + +while IFS= read -r host_ip || [[ -n "$host_ip" ]]; do + echo "Collecting public key from $host_ip..." + + # Retrieve the public key and append to the temporary file + ssh "$host_ip" "cat ~/.ssh/id_rsa.pub" >> "$ALL_KEYS_FILE" 2>/dev/null + if [[ $? -ne 0 ]]; then + echo "Warning: Could not collect key from $host_ip. SSH key might not have been generated." + else + echo "Collected key from $host_ip" + fi +done < "$WORKERS_FILE" + +while IFS= read -r host_ip || [[ -n "$host_ip" ]]; do + echo "Copying all collected public keys to $host_ip..." + + # Send the file with all public keys to each worker node's authorized_keys + ssh "$host_ip" "cat >> ~/.ssh/authorized_keys" < "$ALL_KEYS_FILE" && echo "Copied keys to $host_ip" +done < "$WORKERS_FILE" + +rm "$ALL_KEYS_FILE" +echo "SSH key setup complete. All workers should now be able to SSH into each other." diff --git a/scripts/ansible/tidb/README.md b/scripts/ansible/tidb/README.md new file mode 100644 index 0000000..49bdfc7 --- /dev/null +++ b/scripts/ansible/tidb/README.md @@ -0,0 +1,32 @@ +## TiDB Cluster and Operator Deployment with Ansible + +### Set up local PVs for TiDB cluster + +```shell +ansible-playbook -i ../cloudlab_inventory.yml tidb_pv_setup.yml +``` + +This will set up the local PVs for TiDB cluster to use in the self-managed Kubernetes cluster. +It will create a loop device with four directories mounted on it under `agent-ops/tidb/`. + +### Deploy TiDB Operator and Cluster + +```shell +ansible-playbook -i ../cloudlab_inventory.yml tidb_operator_cluster.yml +``` + +This will deploy the TiDB Operator and a TiDB cluster with dashboard and monitor. + +### Check the Deployment Status + +Check the status of the TiDB Operator: +```shell +kubectl get pods --namespace tidb-admin -l app.kubernetes.io/instance=tidb-operator +``` + +Check the status of the TiDB cluster: +```shell +kubectl get po -n tidb-cluster +``` + +We should also consider the local Kind cluster setup, which is much easier (without the need to setup local PV and bind the storageclass): https://docs.pingcap.com/tidb-in-kubernetes/stable/get-started#step-2-deploy-tidb-operator diff --git a/scripts/ansible/tidb/local-volume-provisioner.yaml b/scripts/ansible/tidb/local-volume-provisioner.yaml new file mode 100644 index 0000000..fd7f351 --- /dev/null +++ b/scripts/ansible/tidb/local-volume-provisioner.yaml @@ -0,0 +1,167 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: "monitoring-storage" +provisioner: "kubernetes.io/no-provisioner" +volumeBindingMode: "WaitForFirstConsumer" +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: "ssd-storage" +provisioner: "kubernetes.io/no-provisioner" +volumeBindingMode: "WaitForFirstConsumer" +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: "shared-ssd-storage" +provisioner: "kubernetes.io/no-provisioner" +volumeBindingMode: "WaitForFirstConsumer" +--- +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: "backup-storage" +provisioner: "kubernetes.io/no-provisioner" +volumeBindingMode: "WaitForFirstConsumer" +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: local-provisioner-config + namespace: kube-system +data: + setPVOwnerRef: "true" + nodeLabelsForPV: | + - kubernetes.io/hostname + storageClassMap: | + ssd-storage: + hostDir: /users/yinfang/agent-ops/tidb/ssd + mountDir: /mnt/ssd + shared-ssd-storage: + hostDir: /users/yinfang/agent-ops/tidb/sharedssd + mountDir: /mnt/sharedssd + monitoring-storage: + hostDir: /users/yinfang/agent-ops/tidb/monitoring + mountDir: /mnt/monitoring + backup-storage: + hostDir: /users/yinfang/agent-ops/tidb/backup + mountDir: /mnt/backup +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: local-volume-provisioner + namespace: kube-system + labels: + app: local-volume-provisioner +spec: + selector: + matchLabels: + app: local-volume-provisioner + template: + metadata: + labels: + app: local-volume-provisioner + spec: + serviceAccountName: local-storage-admin + containers: + - image: "quay.io/external_storage/local-volume-provisioner:v2.3.4" + name: provisioner + securityContext: + privileged: true + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: MY_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: JOB_CONTAINER_IMAGE + value: "quay.io/external_storage/local-volume-provisioner:v2.3.4" + resources: + requests: + cpu: 100m + memory: 100Mi + limits: + cpu: 100m + memory: 100Mi + volumeMounts: + - mountPath: /etc/provisioner/config + name: provisioner-config + readOnly: true + - mountPath: /mnt/ssd + name: local-ssd + mountPropagation: "HostToContainer" + - mountPath: /mnt/sharedssd + name: local-sharedssd + mountPropagation: "HostToContainer" + - mountPath: /mnt/backup + name: local-backup + mountPropagation: "HostToContainer" + - mountPath: /mnt/monitoring + name: local-monitoring + mountPropagation: "HostToContainer" + volumes: + - name: provisioner-config + configMap: + name: local-provisioner-config + - name: local-ssd + hostPath: + path: /users/yinfang/agent-ops/tidb/ssd + - name: local-sharedssd + hostPath: + path: /users/yinfang/agent-ops/tidb/sharedssd + - name: local-backup + hostPath: + path: /users/yinfang/agent-ops/tidb/backup + - name: local-monitoring + hostPath: + path: /users/yinfang/agent-ops/tidb/monitoring +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: local-storage-admin + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: local-storage-provisioner-pv-binding + namespace: kube-system +subjects: +- kind: ServiceAccount + name: local-storage-admin + namespace: kube-system +roleRef: + kind: ClusterRole + name: system:persistent-volume-provisioner + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: local-storage-provisioner-node-clusterrole + namespace: kube-system +rules: +- apiGroups: [""] + resources: ["nodes"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: local-storage-provisioner-node-binding + namespace: kube-system +subjects: +- kind: ServiceAccount + name: local-storage-admin + namespace: kube-system +roleRef: + kind: ClusterRole + name: local-storage-provisioner-node-clusterrole + apiGroup: rbac.authorization.k8s.io diff --git a/scripts/ansible/tidb/tidb-cluster.yaml b/scripts/ansible/tidb/tidb-cluster.yaml new file mode 100644 index 0000000..1f7dde8 --- /dev/null +++ b/scripts/ansible/tidb/tidb-cluster.yaml @@ -0,0 +1,54 @@ +# IT IS NOT SUITABLE FOR PRODUCTION USE. +# This YAML describes a basic TiDB cluster with minimum resource requirements, +# which should be able to run in any Kubernetes cluster with storage support. +apiVersion: pingcap.com/v1alpha1 +kind: TidbCluster +metadata: + name: basic +spec: + version: v8.1.0 + timezone: UTC + pvReclaimPolicy: Retain + enableDynamicConfiguration: true + configUpdateStrategy: RollingUpdate + discovery: {} + helper: + image: alpine:3.16.0 + pd: + baseImage: pingcap/pd + maxFailoverCount: 0 + replicas: 1 + # if storageClassName is not set, the default Storage Class of the Kubernetes cluster will be used + storageClassName: shared-ssd-storage + requests: + storage: "1Gi" + config: {} + tikv: + baseImage: pingcap/tikv + maxFailoverCount: 0 + # If only 1 TiKV is deployed, the TiKV region leader + # cannot be transferred during upgrade, so we have + # to configure a short timeout + evictLeaderTimeout: 1m + replicas: 1 + # if storageClassName is not set, the default Storage Class of the Kubernetes cluster will be used + storageClassName: ssd-storage + requests: + storage: "1Gi" + config: + storage: + # In basic examples, we set this to avoid using too much storage. + reserve-space: "0MB" + rocksdb: + # In basic examples, we set this to avoid the following error in some Kubernetes clusters: + # "the maximum number of open file descriptors is too small, got 1024, expect greater or equal to 82920" + max-open-files: 256 + raftdb: + max-open-files: 256 + tidb: + baseImage: pingcap/tidb + maxFailoverCount: 0 + replicas: 1 + service: + type: ClusterIP + config: {} diff --git a/scripts/ansible/tidb/tidb-dashboard.yaml b/scripts/ansible/tidb/tidb-dashboard.yaml new file mode 100644 index 0000000..8045534 --- /dev/null +++ b/scripts/ansible/tidb/tidb-dashboard.yaml @@ -0,0 +1,23 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbDashboard +metadata: + name: basic +spec: + baseImage: pingcap/tidb-dashboard + version: latest + + ## tidb cluster to be monitored + ## ** now only support monitoring one tidb cluster ** + clusters: + - name: basic + + ## describes the compute resource requirements and limits. + ## Ref: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/ + requests: + # cpu: 1000m + # memory: 1Gi + storage: 1Gi + storageClassName: monitoring-storage + # limits: + # cpu: 2000m + # memory: 2Gi diff --git a/scripts/ansible/tidb/tidb-monitor.yaml b/scripts/ansible/tidb/tidb-monitor.yaml new file mode 100644 index 0000000..9e1259d --- /dev/null +++ b/scripts/ansible/tidb/tidb-monitor.yaml @@ -0,0 +1,24 @@ +apiVersion: pingcap.com/v1alpha1 +kind: TidbMonitor +metadata: + name: basic +spec: + replicas: 1 + clusters: + - name: basic + prometheus: + baseImage: prom/prometheus + version: v2.27.1 + grafana: + baseImage: grafana/grafana + version: 7.5.11 + initializer: + baseImage: pingcap/tidb-monitor-initializer + version: v8.1.0 + reloader: + baseImage: pingcap/tidb-monitor-reloader + version: v1.0.1 + prometheusReloader: + baseImage: quay.io/prometheus-operator/prometheus-config-reloader + version: v0.49.0 + imagePullPolicy: IfNotPresent diff --git a/scripts/ansible/tidb/tidb-operator.yaml b/scripts/ansible/tidb/tidb-operator.yaml new file mode 100644 index 0000000..c190fb1 --- /dev/null +++ b/scripts/ansible/tidb/tidb-operator.yaml @@ -0,0 +1,306 @@ +# Default values for tidb-operator + +# clusterScoped is whether tidb-operator should manage kubernetes cluster wide tidb clusters +# Also see rbac.create, controllerManager.serviceAccount, scheduler.create and controllerManager.clusterPermissions. +clusterScoped: true + +# Also see clusterScoped and controllerManager.serviceAccount +rbac: + create: true + +# timezone is the default system timzone +timezone: UTC + +# operatorImage is TiDB Operator image +operatorImage: pingcap/tidb-operator:v1.6.0 +imagePullPolicy: IfNotPresent +# imagePullSecrets: [] + +# tidbBackupManagerImage is tidb backup manager image +tidbBackupManagerImage: pingcap/tidb-backup-manager:v1.6.0 + +# +# Enable or disable tidb-operator features: +# +# AdvancedStatefulSet (default: false) +# If enabled, tidb-operator will use AdvancedStatefulSet to manage pods +# instead of Kubernetes StatefulSet. +# It's ok to turn it on if this feature is not enabled. However it's not ok +# to turn it off when the tidb-operator already uses AdvancedStatefulSet to +# manage pods. This is in GA phase. +# Ref: https://docs.pingcap.com/tidb-in-kubernetes/stable/advanced-statefulset +# +# VolumeModifying (default false) +# If enabled, tidb-operator support to increase the size or performance of volumes +# for specific volume provisioner. This is in GA phase for AWS EBS. +# +# VolumeReplacing (default false) +# If enabled, tidb-operator support generic volume changes (size, storageclass & number of volumes) +# by deleting old pod + pvc completely and replacing with new pods/PVCs. +# pd, tikv, tidb supported. If enabled, takes precedence over volume resizing & modifying. +# This works by deleting pod+volumes one at a time (similar to scale-in), and recreating it. +# For PD & TiKV this will first create one new spare replica to take the load before deleting existing ones +# Safely deleting a volume and replacing them can take a long time (Especially TiKV to move regions). +# This is in Alpha phase. +# +features: [] +# - AdvancedStatefulSet=false +# - VolumeModifying=false +# - VolumeReplacing=false + +appendReleaseSuffix: false + +controllerManager: + create: true + # With rbac.create=false, the user is responsible for creating this account + # With rbac.create=true, this service account will be created + # Also see rbac.create and clusterScoped + serviceAccount: tidb-controller-manager + + # clusterPermissions are some cluster scoped permissions that will be used even if `clusterScoped: false`. + # the default value of these fields is `true`. if you want them to be `false`, you MUST set them to `false` explicitly. + clusterPermissions: + nodes: true + persistentvolumes: true + storageclasses: true + + logLevel: 2 + replicas: 1 + resources: + requests: + cpu: 80m + memory: 50Mi +# # REF: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/ +# priorityClassName: system-cluster-critical +# + + # REF: https://pkg.go.dev/k8s.io/client-go/tools/leaderelection#LeaderElectionConfig + ## leaderLeaseDuration is the duration that non-leader candidates will wait to force acquire leadership + # leaderLeaseDuration: 15s + ## leaderRenewDeadline is the duration that the acting master will retry refreshing leadership before giving up + # leaderRenewDeadline: 10s + ## leaderRetryPeriod is the duration the LeaderElector clients should wait between tries of actions + # leaderRetryPeriod: 2s + ## leaderResourceLock is the type of resource object that will be used for locking during leader election + ## If using "endpoints" before and want to migrate to "leases", you should migrate to "endpointsleases" first + # leaderResourceLock: "leases" + + ## number of workers that are allowed to sync concurrently. default 5 + # workers: 5 + + # autoFailover is whether tidb-operator should auto failover when failure occurs + autoFailover: true + # pd failover period default(5m) + pdFailoverPeriod: 5m + # tikv failover period default(5m) + tikvFailoverPeriod: 5m + # tidb failover period default(5m) + tidbFailoverPeriod: 5m + # tiflash failover period default(5m) + tiflashFailoverPeriod: 5m + # dm-master failover period default(5m) + dmMasterFailoverPeriod: 5m + # dm-worker failover period default(5m) + dmWorkerFailoverPeriod: 5m + # detectNodeFailure tells whether tidb-operator should auto detect k8s node failures for recovery of failure pods. Currently it is experimental + detectNodeFailure: false + # podHardRecoveryPeriod is the time limit after which a failure pod is forcefully marked as k8s node failure. To be set if detectNodeFailure is true default (24h) + # podHardRecoveryPeriod: 24h + ## affinity defines pod scheduling rules,affinity default settings is empty. + ## please read the affinity document before set your scheduling rule: + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + affinity: {} + ## nodeSelector ensure pods only assigning to nodes which have each of the indicated key-value pairs as labels + ## ref:https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector + nodeSelector: {} + ## Tolerations are applied to pods, and allow pods to schedule onto nodes with matching taints. + ## refer to https://kubernetes.io/docs/concepts/configuration/taint-and-toleration + tolerations: [] + # - key: node-role + # operator: Equal + # value: tidb-operator + # effect: "NoSchedule" + ## Selector (label query) to filter on, make sure that this controller manager only manages the custom resources that match the labels + ## refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#equality-based-requirement + selector: [] + # - canary-release=v1 + # - k1==v1 + # - k2!=v2 + ## Env define environments for the controller manager. + ## NOTE that the following env names is reserved: + ## - NAMESPACE + ## - TZ + ## - HELM_RELEASE + env: [] + # - name: AWS_REGION + # value: us-west-2 + # SecurityContext is security config of this component, it will set template.spec.securityContext + # Refer to https://kubernetes.io/docs/tasks/configure-pod-container/security-context + securityContext: {} + # runAsUser: 1000 + # runAsGroup: 2000 + # fsGroup: 2000 + # PodAnnotations will set template.metadata.annotations + # Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ + podAnnotations: {} + ## KubeClientQPS indicates the maximum QPS to the kubenetes API server from client. + # kubeClientQPS: 5 + ## Maximum burst for throttle. + # kubeClientBurst: 10 + +scheduler: + create: false + # With rbac.create=false, the user is responsible for creating this account + # With rbac.create=true, this service account will be created + # Also see rbac.create and clusterScoped + serviceAccount: tidb-scheduler + logLevel: 2 + replicas: 1 + schedulerName: tidb-scheduler + resources: + limits: + cpu: 250m + memory: 150Mi + requests: + cpu: 80m + memory: 50Mi + kubeSchedulerImageName: registry.k8s.io/kube-scheduler + # This will default to matching your kubernetes version + # kubeSchedulerImageTag: + ## affinity defines pod scheduling rules,affinity default settings is empty. + ## please read the affinity document before set your scheduling rule: + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + affinity: {} + ## nodeSelector ensure pods only assigning to nodes which have each of the indicated key-value pairs as labels + ## ref:https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector + nodeSelector: {} + ## Tolerations are applied to pods, and allow pods to schedule onto nodes with matching taints. + ## refer to https://kubernetes.io/docs/concepts/configuration/taint-and-toleration + tolerations: [] + # - key: node-role + # operator: Equal + # value: tidb-operator + # effect: "NoSchedule" + # + # SecurityContext is security config of this component, it will set template.spec.securityContext + # Refer to https://kubernetes.io/docs/tasks/configure-pod-container/security-context + securityContext: {} + # runAsUser: 1000 + # runAsGroup: 2000 + # fsGroup: 2000 + # PodAnnotations will set template.metadata.annotations + # Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ + podAnnotations: {} + + # additional annotations for the configmap, mainly to prevent spinnaker versioning the cm + configmapAnnotations: {} + +# When AdvancedStatefulSet feature is enabled, you must install +# AdvancedStatefulSet controller. +# Note that AdvancedStatefulSet CRD must be installed manually via the following +# command: +# kubectl apply -f manifests/advanced-statefulset-crd.v1.yaml +advancedStatefulset: + create: false + ## resourceLock indicates the type of resource object that will be used for locking during leader election. + ## If using "endpoints" before and want to migrate to "leases", you should migrate to "endpointsleases" first. + # resourceLock: "leases" + image: pingcap/advanced-statefulset:v0.4.0 + imagePullPolicy: IfNotPresent + serviceAccount: advanced-statefulset-controller + logLevel: 4 + replicas: 1 + resources: + limits: + cpu: 500m + memory: 300Mi + requests: + cpu: 200m + memory: 50Mi + ## affinity defines pod scheduling rules,affinity default settings is empty. + ## please read the affinity document before set your scheduling rule: + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + affinity: {} + ## nodeSelector ensure pods only assigning to nodes which have each of the indicated key-value pairs as labels + ## ref:https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector + nodeSelector: {} + ## Tolerations are applied to pods, and allow pods to schedule onto nodes with matching taints. + ## refer to https://kubernetes.io/docs/concepts/configuration/taint-and-toleration + tolerations: [] + # - key: node-role + # operator: Equal + # value: tidb-operator + # effect: "NoSchedule" + # + # SecurityContext is security config of this component, it will set template.spec.securityContext + # Refer to https://kubernetes.io/docs/tasks/configure-pod-container/security-context + securityContext: {} + # runAsUser: 1000 + # runAsGroup: 2000 + # fsGroup: 2000 + +admissionWebhook: + create: false + replicas: 1 + serviceAccount: tidb-admission-webhook + logLevel: 2 + rbac: + create: true + ## validation webhook would check the given request for the specific resource and operation + validation: + ## statefulsets hook would check requests for updating tidbcluster's statefulsets + ## If enabled it, the statefulsets of tidbcluseter would update in partition by tidbcluster's annotation + statefulSets: false + ## validating hook validates the correctness of the resources under pingcap.com group + pingcapResources: false + ## mutation webhook would mutate the given request for the specific resource and operation + mutation: + ## defaulting hook set default values for the the resources under pingcap.com group + pingcapResources: true + ## failurePolicy are applied to ValidatingWebhookConfiguration which affect tidb-admission-webhook + ## refer to https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#failure-policy + failurePolicy: + ## the validation webhook would check the request of the given resources. + validation: Fail + ## the mutation webhook would mutate the request of the given resources. + mutation: Fail + ## tidb-admission-webhook deployed as kubernetes apiservice server + ## refer to https://github.com/openshift/generic-admission-server + apiservice: + ## apiservice config + ## refer to https://kubernetes.io/docs/tasks/access-kubernetes-api/configure-aggregation-layer/#contacting-the-extension-apiserver + insecureSkipTLSVerify: true + ## The Secret includes the TLS ca, cert and key for the `tidb-admission-webook..svc` Service. + ## If insecureSkipTLSVerify is true, this would be ignored. + ## You can create the tls secret by: + ## kubectl create secret generic --namespace= --from-file=tls.crt= --from-file=tls.key= --from-file=ca.crt= + tlsSecret: "" + ## The caBundle for the webhook apiservice, you could get it by the secret you created previously: + ## kubectl get secret --namespace= -o=jsonpath='{.data.ca\.crt}' + caBundle: "" + ## certProvider indicate the key and cert for the webhook configuration to communicate with `kubernetes.default` service. + ## If your kube-apiserver's version >= 1.13.0, you can leave cabundle empty and the kube-apiserver + ## would trust the roots on the apiserver. + ## refer to https://github.com/kubernetes/api/blob/master/admissionregistration/v1/types.go#L529 + ## or you can get the cabundle by: + ## kubectl get configmap -n kube-system extension-apiserver-authentication -o=jsonpath='{.data.client-ca-file}' | base64 | tr -d '\n' + cabundle: "" + # SecurityContext is security config of this component, it will set template.spec.securityContext + # Refer to https://kubernetes.io/docs/tasks/configure-pod-container/security-context + securityContext: {} + # runAsUser: 1000 + # runAsGroup: 2000 + # fsGroup: 2000 + ## nodeSelector ensures that pods are only scheduled to nodes that have each of the indicated key-value pairs as labels + ## ref:https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector + nodeSelector: {} + ## Tolerations are applied to pods, and allow pods to schedule onto nodes with matching taints. + ## refer to https://kubernetes.io/docs/concepts/configuration/taint-and-toleration + tolerations: [] + # - key: node-role + # operator: Equal + # value: tidb-operator + # effect: "NoSchedule" + # + + diff --git a/scripts/ansible/tidb/tidb_operator_cluster.yml b/scripts/ansible/tidb/tidb_operator_cluster.yml new file mode 100644 index 0000000..4aed79b --- /dev/null +++ b/scripts/ansible/tidb/tidb_operator_cluster.yml @@ -0,0 +1,107 @@ +--- +- name: Setup TiDB Cluster and Operator + hosts: control_node + become: true + vars: + kubeconfig_path: "/users/{{ k8s_user }}/.kube/config" + tasks: + - name: Check if CRDs already exist + shell: kubectl get crd backups.pingcap.com + register: crd_exists + failed_when: false + changed_when: false + environment: + KUBECONFIG: "{{ kubeconfig_path }}" + + - name: Delete existing CRDs if they exist + shell: kubectl delete -f https://raw.githubusercontent.com/pingcap/tidb-operator/v1.6.0/manifests/crd.yaml + when: crd_exists.rc == 0 + ignore_errors: true + environment: + KUBECONFIG: "{{ kubeconfig_path }}" + + - name: Create CRDs for TiDB Operator + shell: kubectl create -f https://raw.githubusercontent.com/pingcap/tidb-operator/v1.6.0/manifests/crd.yaml + register: crd_create + environment: + KUBECONFIG: "{{ kubeconfig_path }}" + + - name: Check CRDs creation result + fail: + msg: "CRDs creation failed. Ensure Kubernetes is configured properly." + when: crd_create.failed + + - name: Create namespace for TiDB Operator + shell: kubectl create namespace tidb-admin + register: tidb_admin_ns + ignore_errors: true + environment: + KUBECONFIG: "{{ kubeconfig_path }}" + + - name: Create namespace for TiDB Cluster + shell: kubectl create namespace tidb-cluster + register: tidb_cluster_ns + ignore_errors: true + environment: + KUBECONFIG: "{{ kubeconfig_path }}" + + - name: Install TiDB Operator with Helm + shell: | + helm repo add pingcap https://charts.pingcap.org + helm repo update + helm upgrade --install tidb-operator pingcap/tidb-operator \ + --namespace=tidb-admin --version=v1.6.0 -f {{ playbook_dir }}/tidb-operator.yaml + register: helm_install + environment: + KUBECONFIG: "{{ kubeconfig_path }}" + + - name: Check Helm installation result + fail: + msg: "Helm installation of TiDB Operator failed." + when: helm_install.failed + + - name: Wait for TiDB Operator pods to be ready + shell: kubectl wait --namespace tidb-admin \ + --for=condition=Ready pod \ + --selector=app.kubernetes.io/name=tidb-operator --timeout=300s + register: wait_tidb_operator + environment: + KUBECONFIG: "{{ kubeconfig_path }}" + + - name: Check if TiDB Operator pods are ready + fail: + msg: "TiDB Operator pods are not ready." + when: wait_tidb_operator.failed + + - name: Apply TidbCluster configuration + shell: kubectl -n tidb-cluster apply -f {{ playbook_dir }}/tidb-cluster.yaml + register: tidb_cluster_apply + environment: + KUBECONFIG: "{{ kubeconfig_path }}" + + - name: Check TidbCluster configuration apply result + fail: + msg: "Failed to apply TidbCluster configuration." + when: tidb_cluster_apply.failed + + - name: Apply TidbDashboard configuration + shell: kubectl -n tidb-cluster apply -f {{ playbook_dir }}/tidb-dashboard.yaml + register: tidb_dashboard_apply + environment: + KUBECONFIG: "{{ kubeconfig_path }}" + + - name: Check TidbDashboard configuration apply result + fail: + msg: "Failed to apply TidbDashboard configuration." + when: tidb_dashboard_apply.failed + + - name: Apply TidbMonitor configuration + shell: kubectl -n tidb-cluster apply -f {{ playbook_dir }}/tidb-monitor.yaml + register: tidb_monitor_apply + environment: + KUBECONFIG: "{{ kubeconfig_path }}" + + - name: Check TidbMonitor configuration apply result + fail: + msg: "Failed to apply TidbMonitor configuration." + when: tidb_monitor_apply.failed diff --git a/scripts/ansible/tidb/tidb_pv_setup.yml b/scripts/ansible/tidb/tidb_pv_setup.yml new file mode 100644 index 0000000..eddf455 --- /dev/null +++ b/scripts/ansible/tidb/tidb_pv_setup.yml @@ -0,0 +1,173 @@ +--- +- name: Configure Local Persistent Volumes for TiDB + hosts: all + become: true + tasks: + + # Step 1: Install necessary tools + - name: Ensure necessary packages are installed + apt: + name: + - e2fsprogs # For mkfs.ext4 + - util-linux # For losetup + state: present + update_cache: true + + # Step 2: Create a single loopback device for SSD + - name: Create a loopback device for SSD + shell: | + dd if=/dev/zero of=/tmp/ssd.img bs=1M count=2048 + losetup -fP /tmp/ssd.img + # when: inventory_hostname in groups['worker_nodes'] # Apply only to worker nodes + + - name: Get loopback device for SSD + shell: losetup -a | grep /tmp/ssd.img + register: ssd_loop_device + # when: inventory_hostname in groups['worker_nodes'] + + - name: Format SSD loopback device as ext4 + shell: mkfs.ext4 {{ ssd_loop_device.stdout.split(":")[0] }} + # when: inventory_hostname in groups['worker_nodes'] + + - name: Mount SSD device + mount: + path: /users/{{ k8s_user }}/agent-ops/tidb/ssd + src: "{{ ssd_loop_device.stdout.split(':')[0] }}" + fstype: ext4 + opts: defaults + state: mounted + # when: inventory_hostname in groups['worker_nodes'] + + # Step 3: Create subdirectories and bind-mount them + - name: Create subdirectories for sharedssd, monitoring, and backup + shell: mkdir -p /users/{{ k8s_user }}/agent-ops/tidb/ssd/{{ item }} + loop: + - sharedssd_vol1 + - sharedssd_vol2 + - sharedssd_vol3 + - sharedssd_vol4 + - sharedssd_vol5 + - sharedssd_vol6 + - sharedssd_vol7 + - sharedssd_vol8 + - sharedssd_vol9 + - sharedssd_vol10 + - monitoring_vol1 + - monitoring_vol2 + - monitoring_vol3 + - monitoring_vol4 + - monitoring_vol5 + - monitoring_vol6 + - monitoring_vol7 + - monitoring_vol8 + - monitoring_vol9 + - monitoring_vol10 + - backup_vol1 + - backup_vol2 + - backup_vol3 + - backup_vol4 + - backup_vol5 + - backup_vol6 + - backup_vol7 + - backup_vol8 + - backup_vol9 + - backup_vol10 + # when: inventory_hostname in groups['worker_nodes'] + + - name: Create target directories for bind mounts + file: + path: /users/{{ k8s_user }}/agent-ops/tidb/{{ item.split('_')[0] }}/{{ item }} + state: directory + mode: '0755' + loop: + - sharedssd_vol1 + - sharedssd_vol2 + - sharedssd_vol3 + - sharedssd_vol4 + - sharedssd_vol5 + - sharedssd_vol6 + - sharedssd_vol7 + - sharedssd_vol8 + - sharedssd_vol9 + - sharedssd_vol10 + - monitoring_vol1 + - monitoring_vol2 + - monitoring_vol3 + - monitoring_vol4 + - monitoring_vol5 + - monitoring_vol6 + - monitoring_vol7 + - monitoring_vol8 + - monitoring_vol9 + - monitoring_vol10 + - backup_vol1 + - backup_vol2 + - backup_vol3 + - backup_vol4 + - backup_vol5 + - backup_vol6 + - backup_vol7 + - backup_vol8 + - backup_vol9 + - backup_vol10 + + - name: Bind-mount subdirectories + shell: mount --bind /users/{{ k8s_user }}/agent-ops/tidb/ssd/{{ item }} /users/{{ k8s_user }}/agent-ops/tidb/{{ item.split('_')[0] }}/{{ item }} + loop: + - sharedssd_vol1 + - sharedssd_vol2 + - sharedssd_vol3 + - sharedssd_vol4 + - sharedssd_vol5 + - sharedssd_vol6 + - sharedssd_vol7 + - sharedssd_vol8 + - sharedssd_vol9 + - sharedssd_vol10 + - monitoring_vol1 + - monitoring_vol2 + - monitoring_vol3 + - monitoring_vol4 + - monitoring_vol5 + - monitoring_vol6 + - monitoring_vol7 + - monitoring_vol8 + - monitoring_vol9 + - monitoring_vol10 + - backup_vol1 + - backup_vol2 + - backup_vol3 + - backup_vol4 + - backup_vol5 + - backup_vol6 + - backup_vol7 + - backup_vol8 + - backup_vol9 + - backup_vol10 + # when: inventory_hostname in groups['worker_nodes'] + +####################################################### +- name: Apply Kubernetes Configuration for Provisioner + hosts: control_nodes + become: true + vars: + kubeconfig_path: "/users/{{ k8s_user }}/.kube/config" + tasks: + + # Step 5: Deploy the local-volume-provisioner + - name: Apply the local-volume-provisioner YAML configuration + shell: kubectl apply -f local-volume-provisioner.yaml + environment: + KUBECONFIG: "{{ kubeconfig_path }}" + args: + chdir: "{{ playbook_dir }}" + + # Step 6: Verify the setup + - name: Verify Persistent Volumes + shell: kubectl get pv + environment: + KUBECONFIG: "{{ kubeconfig_path }}" + register: pv_output + + - debug: + var: pv_output.stdout diff --git a/scripts/geni_lib/README.md b/scripts/geni_lib/README.md new file mode 100644 index 0000000..5285529 --- /dev/null +++ b/scripts/geni_lib/README.md @@ -0,0 +1,180 @@ +# Getting CloudLab Credentials + +1. Go to https://www.cloudlab.us/ +2. Login with your cloudlab account +3. On the top right corner, click on your username, and then click on "Download Credentials" +4. This will take you to a page with a button to download the credentials. Click on it. +5. This will download a file called `cloudlab.pem`. + +The `cloudlab.pem` contains the encrypted private key to your cloudlab account and ssl certificate. You need to decrypt it before using it. + +## Install OpenSSL (if not already installed) + +For Ubuntu/Debian: +```bash +sudo apt install openssl +``` + +For macOS: +```bash +brew install openssl +``` + +## Decrypt the CloudLab credentials + +```bash +openssl rsa -in cloudlab.pem -out cloudlab_decrypted.pem +``` + +When prompted for a password, enter your CloudLab account password (the same one you use to login to the CloudLab website). +This will create a new file `cloudlab_decrypted.pem` containing your decrypted private key. +The SSL certificate remains in the original `cloudlab.pem` file. + +# About `geni-lib` library + +The `geni-lib` library is a Python library for interacting with the GENI (Global Environment for Network Innovations) API. It provides a Python interface to manage slices and slivers on GENI-enabled resources. The original library can be found [here](https://gitlab.flux.utah.edu/emulab/geni-lib). For this project, we have made some modifications to the original library to make it python3 compatible as the original library has some python3 context that causes issues when using it in python3. + +The modified library can be found in the `scripts/geni-lib/mod/geni-lib-xlab` directory. It will be automatically installed when you run `uv sync` to install the dependencies. + +## Building a context definition for CloudLab + +To build a context definition, you'll need: +- Your CloudLab certificate (`cloudlab.pem`) +- Your decrypted private key (`cloudlab_decrypted.pem`) +- Your SSH public key +- Your project name (use lowercase to avoid Slice URN conflicts) + +Use the following command format: +```bash +build-context --type cloudlab --cert --key --pubkey --project +``` + +Example: +```bash +build-context --type cloudlab --cert cloudlab.pem --key cloudlab_decrypted.pem --pubkey ~/.ssh/id_ed25519.pub --project aiopslab +``` + +# How GENI Works + +GENI (Global Environment for Network Innovations) and CloudLab use two core concepts for managing experimental resources: + +## Understanding Slices and Slivers + +### Slice +- A slice is a logical container that groups resources (nodes, links) for a specific experiment +- Think of it as a virtual workspace for organizing resources +- Has an expiration time that can be renewed + +### Sliver +- A sliver is a specific allocated resource (node, link, VM) within a slice +- Each sliver exists at a particular physical site (aggregate) +- Examples: A compute node at Wisconsin CloudLab +- Slivers include details like: + - Node specifications (e.g., c220g5) + - IP addresses (public and private) + - SSH access information +- Sliver expiration time cannot exceed its parent slice's expiration time + +## Understanding RSpec Files + +RSpec files are used to define the resources and their configurations for a slice. We can get them two ways: +1. We can modify the `generate_rspec.py` script to programmatically define our resources and generate the RSpec file corresponding to our resources. +2. We can simply go to cloudlab and copy the rspec of a profile we want to use. Store the rspec files in the `scripts/geni-lib/rspecs` directory. + +## Using the GENI Manager + +The `genictl.py` script provides a CLI to manage both slices and slivers. + +### Available Commands + +1. **create-slice** + - Creates a new slice container for your experiment + ```bash + python3 genictl.py create-slice [--hours HOURS] [--description DESCRIPTION] + ``` + +2. **create-sliver** + - Allocates resources in a specific site + - Saves login information to `.login.info.txt` + ```bash + python3 genictl.py create-sliver --site {utah,clemson,wisconsin} + ``` + +3. **sliver-status** + - Checks the current status of allocated resources + ```bash + python3 genictl.py sliver-status --site {utah,clemson,wisconsin} + ``` + +4. **renew-slice** + - Extends the expiration time of a slice + ```bash + python3 genictl.py renew-slice [--hours HOURS] + ``` + +5. **renew-sliver** + - Extends the expiration time of resources at a specific site + - Note: Set sliver expiration slightly less than slice expiration (e.g., 2.9h instead of 3h) to account for command execution delays + ```bash + python3 genictl.py renew-sliver [--hours HOURS] --site {utah,clemson,wisconsin} + ``` + +6. **list-slices** + - Shows all active slices and their details + ```bash + python3 genictl.py list-slices + ``` + +7. **sliver-spec** + - Shows detailed specifications of allocated resources + - Includes node specs, IP addresses, and network info + ```bash + python3 genictl.py sliver-spec --site {utah,clemson,wisconsin} + ``` + +8. **delete-sliver** + - Removes allocated resources from a slice + ```bash + python3 genictl.py delete-sliver --site {utah,clemson,wisconsin} + ``` + +9. **get-hardware-info** + - Gets the hardware information from CloudLab. This is useful to get the hardware information of the nodes available in the different sites. + ```bash + python3 genictl.py get-hardware-info + ``` + +10. **create-experiment** + - Creates a quick experiment with the desired hardware type, number of nodes, OS type and duration + ```bash + python3 genictl.py create-experiment [--hardware-type HARDWARE_TYPE] [--nodes NODES] [--duration DURATION] [--os-type OS_TYPE] [--ssh-user SSH_USER] [--ssh-key SSH_KEY] [--k8s] [--pod-network-cidr POD_NETWORK_CIDR] [--deploy-sregym] [--deploy-key DEPLOY_KEY] + ``` + Options: + - `--hardware-type`: Hardware type (default: c220g5) + - `--nodes`: Number of nodes (default: 3) + - `--duration`: Duration in hours (default: 1) + - `--os-type`: OS image (default: UBUNTU22-64-STD) + - `--ssh-user`: SSH username + - `--ssh-key`: SSH private key file + - `--k8s`: boolean flag to bootstrap Kubernetes after sliver is ready + - `--pod-network-cidr`: Calico pod CIDR (default: 192.168.0.0/16) + - `--deploy-sregym`: boolean flag to deploy SREGym after K8s cluster is ready + - `--deploy-key`: Path to SSH deploy key for SREGym private repo + +11. **renew-experiment** + - Renews both slice and sliver for an experiment + ```bash + python3 genictl.py renew-experiment --site {utah,clemson,wisconsin} [--hours HOURS] + ``` + +## Quick Test + +Under the `tests/geni-lib/` directory, there is a script called `test_experiment_creation.py` that can be used to create a quick experiment. + +```bash +cd tests/geni-lib +python3 test_experiment_creation.py +``` + +This will create a 3-node experiment with 3 c220g5 nodes in the Wisconsin site for 1 hour. +The login info will be saved to a file called `.login.info.txt`. diff --git a/scripts/geni_lib/__init__.py b/scripts/geni_lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/geni_lib/cluster_setup.py b/scripts/geni_lib/cluster_setup.py new file mode 100644 index 0000000..1a6b66f --- /dev/null +++ b/scripts/geni_lib/cluster_setup.py @@ -0,0 +1,357 @@ +#!/usr/bin/env python3 + +from __future__ import annotations + +import pathlib +import subprocess +import sys +import time + +import yaml + +from scripts.geni_lib.remote import RemoteExecutor + +CFG_PATH = pathlib.Path("config.yml") + + +def load_cfg() -> dict: + return yaml.safe_load(CFG_PATH.read_text()) + + +def nodes_reachable(cloud: dict, verbose: bool = True) -> bool: + """Check if all nodes are reachable with better error handling and retries""" + print(f"Checking {len(cloud['nodes'])} nodes for SSH connectivity...") + + for i, host in enumerate(cloud["nodes"], 1): + print(f" [{i}/{len(cloud['nodes'])}] Testing {host}...", end=" ") + + max_retries = 3 + success = False + + for retry in range(max_retries): + try: + executor = RemoteExecutor(host, cloud["ssh_user"], cloud.get("ssh_key")) + rc, stdout, stderr = executor.exec("echo 'SSH test successful'") + executor.close() + + if rc == 0: + print("✅") + success = True + break + else: + print(f"❌ (command failed: rc={rc})") + if verbose and retry == max_retries - 1: + print(f" stdout: {stdout.strip()}") + print(f" stderr: {stderr.strip()}") + + except Exception as e: + if retry < max_retries - 1: + print(".", end="") + time.sleep(5) + else: + print(f"❌ ({type(e).__name__}: {str(e)[:80]}...)") + if verbose: + print(f" Full error: {e}") + + if not success: + return False + + print("✅ All nodes reachable!") + return True + + +def install_k8s_components(ex: RemoteExecutor) -> None: + cmds: list[str] = [ + "sudo swapoff -a", + "sudo sed -i '/ swap / s/^/#/' /etc/fstab", + "sudo modprobe br_netfilter", + "sudo modprobe overlay", + "echo 'net.bridge.bridge-nf-call-iptables=1' | sudo tee /etc/sysctl.d/k8s.conf", + "echo 'net.bridge.bridge-nf-call-ip6tables=1' | sudo tee -a /etc/sysctl.d/k8s.conf", + "echo 'net.ipv4.ip_forward=1' | sudo tee -a /etc/sysctl.d/k8s.conf", + "sudo sysctl --system", + "sudo apt-get update -qq", + "sudo apt-get install -yq apt-transport-https ca-certificates curl \ + gnupg lsb-release jq", + "sudo rm -f /etc/apt/sources.list.d/kubernetes.list", + "sudo mkdir -p /etc/apt/keyrings", + "sudo rm -f /etc/apt/keyrings/kubernetes-archive-keyring.gpg", + "curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.28/deb/Release.key -o /tmp/k8s-Release.key", + "sudo gpg --batch --yes --dearmor -o /etc/apt/keyrings/kubernetes-archive-keyring.gpg /tmp/k8s-Release.key", + "rm /tmp/k8s-Release.key", + "echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-archive-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.28/deb/ /' | sudo tee /etc/apt/sources.list.d/kubernetes.list", + "sudo apt-get update -qq", + "sudo apt-get install -yq containerd kubelet kubeadm kubectl", + "sudo apt-mark hold kubelet kubeadm kubectl", + "sudo mkdir -p /etc/containerd", + "sudo containerd config default | sudo tee /etc/containerd/config.toml", + "sudo sed -i 's/SystemdCgroup = false/SystemdCgroup = true/' /etc/containerd/config.toml", + "sudo systemctl restart containerd", + "sudo systemctl enable containerd", + "sudo update-alternatives --set iptables /usr/sbin/iptables-legacy", + "sudo kubeadm config images pull --kubernetes-version $(kubeadm version -o short)", + ] + for cmd in cmds: + print(f" Running: {cmd[:60]}...") + rc, stdout, err = ex.exec(cmd) # Removed timeout parameter + if rc != 0: + print(f" Failed command: {cmd}") + print(f" Error: {err.strip()}") + raise RuntimeError(f"[{ex.host}] `{cmd}` failed:\n{err.strip()}") + + +def _wait_for_api_server(ex: RemoteExecutor, timeout: int = 300) -> None: + start = time.time() + print(" waiting for API server to be ready…") + while time.time() - start < timeout: + ok, _, _ = ex.exec("kubectl get nodes --request-timeout=5s >/dev/null 2>&1") + if ok == 0: + print(" API server is ready!") + return + time.sleep(5) + raise RuntimeError("API server not ready after 300 s") + + +def _wait_controller_ready(ex: RemoteExecutor, timeout: int = 600) -> None: + start = time.time() + print("waiting for controller manager to be ready…") + while time.time() - start < timeout: + ok, out, _ = ex.exec( + "kubectl get pod -n kube-system -l component=kube-controller-manager -o jsonpath='{.items[0].status.conditions[?(@.type==\"Ready\")].status}' 2>/dev/null" + ) + if ok == 0 and out.strip("'\" ") == "True": + print("controller manager pod is Ready!") + return + ok, _, _ = ex.exec( + "kubectl get secret -n kube-system -o json 2>/dev/null | jq -e '.items[] | select(.type==\"kubernetes.io/service-account-token\")' >/dev/null 2>&1" + ) + if ok == 0: + print("ServiceAccount token found!") + return + time.sleep(5) + raise RuntimeError("controller manager not Ready within 10 min") + + +def _apply_calico_operator(ex: RemoteExecutor) -> None: + print("applying Tigera operator (server side)…") + operator_url = "https://raw.githubusercontent.com/projectcalico/calico/v3.27.4/manifests/tigera-operator.yaml" + rc, _, err = ex.exec(f"kubectl apply --server-side --force-conflicts -f {operator_url}") + if rc != 0: + raise RuntimeError(f"Applying tigera‑operator failed:\n{err.strip()}") + + # Wait until CRD shows up + for _ in range(60): + ok, _, _ = ex.exec("kubectl get crd installations.operator.tigera.io >/dev/null 2>&1") + if ok == 0: + break + time.sleep(2) + else: + raise RuntimeError("Installation CRD never appeared") + + print("applying Calico custom resources…") + cr_url = "https://raw.githubusercontent.com/projectcalico/calico/v3.27.4/manifests/custom-resources.yaml" + rc, _, err = ex.exec(f"kubectl apply -f {cr_url}") + if rc != 0: + raise RuntimeError(f"Applying custom-resources failed:\n{err.strip()}") + + print(" Calico install initiated…") + + +def init_master(ex: RemoteExecutor, cidr: str) -> str: + print(f" ->cleaning previous state on {ex.host}…") + for cmd in [ + "sudo kubeadm reset -f >/dev/null 2>&1 || true", + "sudo rm -rf /etc/kubernetes/pki || true", + "sudo rm -rf /etc/kubernetes/manifests/*.yaml /var/lib/etcd/* || true", + "sudo rm -rf /etc/cni/net.d/* || true", + "sudo systemctl restart containerd", + "sudo systemctl restart kubelet", + ]: + ex.exec(cmd) + + print(f" -> running kubeadm init on {ex.host} …") + rc, out, err = ex.exec(f"sudo kubeadm init --pod-network-cidr={cidr} --upload-certs --v=5") + if rc != 0: + raise RuntimeError(f"[{ex.host}] kubeadm init failed:\n{err.strip()}") + + ex.exec("mkdir -p $HOME/.kube") + ex.exec("sudo cp /etc/kubernetes/admin.conf $HOME/.kube/config") + ex.exec("sudo chown $(id -u):$(id -g) $HOME/.kube/config") + + _wait_for_api_server(ex) + _wait_controller_ready(ex) + _apply_calico_operator(ex) + + print(" generating join command…") + for _ in range(30): + rc, join_cmd, _ = ex.exec("sudo kubeadm token create --print-join-command --ttl 24h") + if rc == 0 and join_cmd.strip(): + return join_cmd.strip() + time.sleep(10) + for line in out.splitlines(): + if line.strip().startswith("kubeadm join"): + return " ".join(part.rstrip("\\") for part in line.split()) + raise RuntimeError("timed‑out fetching join command") + + +def join_worker(ex: RemoteExecutor, join_cmd: str) -> None: + print(f" ↳ preparing worker {ex.host}…") + for cmd in [ + "sudo kubeadm reset -f >/dev/null 2>&1 || true", + "sudo rm -rf /etc/kubernetes/pki || true", + "sudo rm -rf /etc/kubernetes/manifests/*.yaml /var/lib/etcd/* /var/lib/kubelet/* || true", + "sudo rm -rf /etc/cni/net.d/* || true", + "sudo systemctl restart kubelet", + ]: + ex.exec(cmd) + + print(f" ↳ joining {ex.host} to cluster…") + rc, _, err = ex.exec(f"sudo {join_cmd}") + if rc != 0: + raise RuntimeError(f"[{ex.host}] kubeadm join failed:\n{err.strip()}") + + +def setup_cloudlab_cluster(cfg: dict) -> None: + cloud, cidr = cfg["cloudlab"], cfg["pod_network_cidr"] + executors: list[RemoteExecutor] = [] + try: + for host in cloud["nodes"]: + print(f"Installing K8s components on {host} …") + ex = RemoteExecutor(host, cloud["ssh_user"], cloud.get("ssh_key")) + install_k8s_components(ex) + executors.append(ex) + + print("\nInitializing control plane…") + join_cmd = init_master(executors[0], cidr) + print("✓ Control plane is Ready!") + + if len(executors) > 1: + print(f"\nJoining {len(executors)-1} workers…") + for ex in executors[1:]: + join_worker(ex, join_cmd) + + # health check + print("\nPerforming cluster health check…") + time.sleep(10) + rc, nodes_out, _ = executors[0].exec("kubectl get nodes --no-headers") + if rc == 0: + print("\n🟢 Cluster is up:") + print(nodes_out) + else: + print("⚠️ Unable to list nodes — check manually.") + finally: + for ex in executors: + ex.close() + + +def setup_kind_cluster(cfg: dict) -> None: + print("CloudLab unreachable — creating local Kind cluster…") + kind_cfg = cfg["kind"]["kind_config_arm"] # adjust arch detection if needed + subprocess.run(["kind", "create", "cluster", "--config", kind_cfg], check=True) + print("Kind cluster ready ") + + +def deploy_sregym(ex: RemoteExecutor, deploy_key_path: str) -> None: + """Deploy SREGym with proper SSH key handling and host verification""" + print("Setting up SREGym deployment…") + + # Read the private key content from local file + try: + with open(deploy_key_path, "r") as f: + private_key_content = f.read() + except FileNotFoundError: + raise RuntimeError(f"Deploy key not found: {deploy_key_path}") + + # Create the private key on the remote server + setup_cmds = [ + "mkdir -p ~/.ssh", + "chmod 700 ~/.ssh", + # Write the private key to remote server + f"cat > ~/.ssh/sregym_deploy << 'EOF'\n{private_key_content}\nEOF", + "chmod 600 ~/.ssh/sregym_deploy", + # Add GitHub to known_hosts to avoid host key verification + "ssh-keyscan -H github.com >> ~/.ssh/known_hosts 2>/dev/null || true", + ] + + for cmd in setup_cmds: + print(f" Setting up SSH: {cmd[:50]}...") + rc, stdout, stderr = ex.exec(cmd) + if rc != 0: + print(f" Setup failed: {stderr.strip()}") + raise RuntimeError(f"SSH setup failed: {stderr.strip()}") + + # Clone and deploy SREGym + deploy_cmds = [ + # Use the correct repository URL + "ssh-agent bash -c 'ssh-add ~/.ssh/sregym_deploy; git clone --recurse-submodules git@github.com:SREGym/SREGym.git /tmp/sregym'", + "cd /tmp/sregym", + # Clean up the private key for security + "rm -f ~/.ssh/sregym_deploy", + ] + + for cmd in deploy_cmds: + print(f" Running: {cmd[:60]}...") + rc, stdout, stderr = ex.exec(cmd) + if rc != 0: + print(f" Failed command: {cmd}") + print(f" Error: {stderr.strip()}") + raise RuntimeError(f"[{ex.host}] `{cmd}` failed:\n{stderr.strip()}") + + print("✅ SREGym deployed successfully!") + + +def setup_cloudlab_cluster_with_sregym(cfg: dict) -> None: + cloud, cidr = cfg["cloudlab"], cfg["pod_network_cidr"] + deploy_key = cfg["deploy_key"] + executors: list[RemoteExecutor] = [] + try: + for host in cloud["nodes"]: + print(f"Installing K8s components on {host} …") + ex = RemoteExecutor(host, cloud["ssh_user"], cloud.get("ssh_key")) + install_k8s_components(ex) + executors.append(ex) + + print("\nInitializing control plane…") + join_cmd = init_master(executors[0], cidr) + print("✓ Control plane is Ready!") + + if len(executors) > 1: + print(f"\nJoining {len(executors)-1} workers…") + for ex in executors[1:]: + join_worker(ex, join_cmd) + + # Deploy SREGym + print("\nDeploying SREGym…") + deploy_sregym(executors[0], deploy_key) + + # Health check + print("\nPerforming cluster health check…") + time.sleep(10) + rc, nodes_out, _ = executors[0].exec("kubectl get nodes --no-headers") + if rc == 0: + print("\n🟢 Cluster is up:") + print(nodes_out) + else: + print("⚠️ Unable to list nodes — check manually.") + finally: + for ex in executors: + ex.close() + + +def main() -> None: + cfg = load_cfg() + try: + if cfg["mode"] == "cloudlab" and nodes_reachable(cfg["cloudlab"]): + setup_cloudlab_cluster(cfg) + else: + setup_kind_cluster(cfg) + except RuntimeError as exc: + print(f"\n X {exc}", file=sys.stderr) + sys.exit(1) + except KeyboardInterrupt: + print("\n⚠️ Setup interrupted by user", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/scripts/geni_lib/generate_rspec.py b/scripts/geni_lib/generate_rspec.py new file mode 100644 index 0000000..c0c1f49 --- /dev/null +++ b/scripts/geni_lib/generate_rspec.py @@ -0,0 +1,35 @@ +import geni.portal as portal + +RSPEC_FILE = "rspecs/test.xml" + +# Create a Request object to start building the RSpec +request = portal.context.makeRequestRSpec() + +# Create the raw "PC" nodes +node1 = request.RawPC("control") +node2 = request.RawPC("compute1") +node3 = request.RawPC("compute2") + +# Set the hardware type +node1.hardware_type = "c220g5" +node2.hardware_type = "c220g5" +node3.hardware_type = "c220g5" + +# Set the disk image +node1.disk_image = "urn:publicid:IDN+emulab.net+image+emulab-ops//UBUNTU22-64-STD" +node2.disk_image = "urn:publicid:IDN+emulab.net+image+emulab-ops//UBUNTU22-64-STD" +node3.disk_image = "urn:publicid:IDN+emulab.net+image+emulab-ops//UBUNTU22-64-STD" + +# node1.routable_control_ip = True +# node2.routable_control_ip = True +# node3.routable_control_ip = True + +# Create a link between the two nodes +link1 = request.Link(members=[node1, node2, node3]) + +# Print the RSpec to the console +portal.context.printRequestRSpec() + +# Save the RSpec to a file +with open(RSPEC_FILE, "w") as f: + f.write(request.toXMLString(pretty_print=True, ucode=True)) diff --git a/scripts/geni_lib/genictl.py b/scripts/geni_lib/genictl.py new file mode 100644 index 0000000..ec3b0ce --- /dev/null +++ b/scripts/geni_lib/genictl.py @@ -0,0 +1,606 @@ +import datetime +import json +import random +import re +import sys +import time +import warnings + +import click +import geni.portal as portal +import geni.util +from cluster_setup import setup_cloudlab_cluster, setup_cloudlab_cluster_with_sregym + +from provisioner.config.settings import AGGREGATES_MAP +from provisioner.utils.parser import collect_and_parse_hardware_info, parse_sliver_info + +warnings.filterwarnings("ignore") + +# List of available OS types +OS_TYPES = [ + "UBUNTU22-64-STD", + "UBUNTU20-64-STD", + "UBUNTU18-64-STD", + "UBUNTU16-64-STD", + "DEBIAN11-64-STD", + "DEBIAN10-64-STD", + "FEDORA36-64-STD", + "CENTOS7-64-STD", + "CENTOS8-64-STD", + "RHEL8-64-STD", +] + + +def validate_hours(ctx, param, value): + float_value = float(value) + if float_value <= 0: + raise click.BadParameter("Hours must be greater than 0") + return float_value + + +def create_slice(context, slice_name, hours, description): + try: + print(f"Creating slice '{slice_name}'...") + expiration = datetime.datetime.now() + datetime.timedelta(hours=hours) + res = context.cf.createSlice(context, slice_name, exp=expiration, desc=description) + print(f"Slice Info: \n{json.dumps(res, indent=2)}") + print(f"Slice '{slice_name}' created") + except Exception as e: + print(f"Error: {e}") + + +def create_sliver(context, slice_name, rspec_file, site): + try: + print(f"Creating sliver in slice '{slice_name}'...") + aggregate = get_aggregate(site) + igm = aggregate.createsliver(context, slice_name, rspec_file) + geni.util.printlogininfo(manifest=igm) + + # Save the login info to a file + login_info = geni.util._corelogininfo(igm) + if isinstance(login_info, list): + login_info = "\n".join(map(str, login_info)) + with open(f"{slice_name}.login.info.txt", "w") as f: + f.write(f"Slice name: {slice_name}\n") + f.write(f"Cluster name: {aggregate.name}\n") + f.write(login_info) + + print(f"Sliver '{slice_name}' created") + except Exception as e: + print(f"Error: {e}") + + +def get_sliver_status(context, slice_name, site): + try: + print("Checking sliver status...") + aggregate = get_aggregate(site) + status = aggregate.sliverstatus(context, slice_name) + print(f"Status: {json.dumps(status, indent=2)}") + except Exception as e: + print(f"Error: {e}") + + +def renew_slice(context, slice_name, hours): + try: + print("Renewing slice...") + new_expiration = datetime.datetime.now() + datetime.timedelta(hours=hours) + context.cf.renewSlice(context, slice_name, new_expiration) + print(f"Slice '{slice_name}' renewed") + except Exception as e: + print(f"Error: {e}") + + +def renew_sliver(context, slice_name, hours, site): + try: + print("Renewing sliver...") + aggregate = get_aggregate(site) + new_expiration = datetime.datetime.now() + datetime.timedelta(hours=hours) + aggregate.renewsliver(context, slice_name, new_expiration) + print(f"Sliver '{slice_name}' renewed") + except Exception as e: + print(f"Error: {e}") + + +def list_slices(context): + try: + print("Listing slices...") + res = context.cf.listSlices(context) + print(json.dumps(res, indent=2)) + except Exception as e: + print(f"Error: {e}") + + +def list_sliver_spec(context, slice_name, site): + try: + print("Listing slivers...") + aggregate = get_aggregate(site) + res = aggregate.listresources(context, slice_name, available=True) + + # Parse and display the information + sliver_info = parse_sliver_info(res.text) + + print("\nExperiment Information:") + print(f"Description: {sliver_info['description']}") + print(f"Expiration: {sliver_info['expiration']}") + + print("\nNodes:") + for node in sliver_info["nodes"]: + print(f"\nNode: {node['client_id']}") + print(f" Hostname: {node['hostname']}") + print(f" Public IP: {node['public_ip']}") + print(f" Internal IP: {node['internal_ip']}") + print(f" Hardware: {node['hardware']}") + print(f" OS Image: {node['os_image']}") + + print("\nLocation:") + print(f" Country: {sliver_info['location']['country']}") + print(f" Latitude: {sliver_info['location']['latitude']}") + print(f" Longitude: {sliver_info['location']['longitude']}") + except Exception as e: + print(f"Error: {e}") + + +def delete_sliver(context, slice_name, site): + try: + print(f"Deleting sliver '{slice_name}'...") + aggregate = get_aggregate(site) + aggregate.deletesliver(context, slice_name) + print(f"Sliver '{slice_name}' deleted.") + except Exception as e: + print(f"Error: {e}") + + +def get_aggregate(site): + return AGGREGATES_MAP.get(site.lower()) + + +def get_hardware_info(): + hardware_info_list = collect_and_parse_hardware_info() + if hardware_info_list: + print(f"\n{'Hardware Name':<20} | {'Cluster Name':<30} | {'Total':<7} | {'Free':<7}") + print("-" * 100) + + for item in hardware_info_list: + if item["total"] > 0 or item["free"] > 0: + print( + f"{item['hardware_name']:<20} | {item['cluster_name']:<30} | {item['total']:<7} | {item['free']:<7}" + ) + else: + print("No hardware information available") + + +# Gives error when hours too high -> Error: expiration increment is greater then the maximum number (7200) of minutes +def renew_experiment(context, slice_name, site, hours): + new_slice_expiration = datetime.datetime.now() + datetime.timedelta(hours=(hours + 1)) + new_sliver_expiration = datetime.datetime.now() + datetime.timedelta(hours=hours) + try: + print(f"Renewing slice: {slice_name}") + context.cf.renewSlice(context, slice_name, new_slice_expiration) + print(f"Slice '{slice_name}' renewed") + except Exception as e: + if "Cannot shorten slice lifetime" in str(e): + print(f"Slice already has sufficient lifetime") + else: + print(f"Error: {e}") + return + + try: + aggregate = get_aggregate(site) + + print(f"Renewing sliver: {slice_name}") + aggregate.renewsliver(context, slice_name, new_sliver_expiration) + print(f"Sliver '{slice_name}' renewed") + + print(f"Your experiment under slice: {slice_name} is successfully renewed for {hours} hours\n") + except Exception as e: + print(f"Error: {e}") + + +# Kubernetes bootstrapper + + +def _host_list_from_logininfo(logininfo) -> list[str]: + """ + Extract hostnames from GENI login info. + + Input format examples: + - "[node0][saleha] c220g5-110426.wisc.cloudlab.us: 22" + - Raw tuples: (node_name, user, hostname, port) + + Returns list[str] of hostnames. + """ + hosts: list[str] = [] + + for item in logininfo: + # Case 1: Raw tuple format (node_name, user, hostname, port) + if isinstance(item, (tuple, list)) and len(item) >= 3: + # The hostname is at index 2 in the tuple format + hostname = item[2] + if hostname and isinstance(hostname, str) and "." in hostname: + hosts.append(hostname) + continue + + # Case 2: String format "[nodeX][user] hostname: port" + if isinstance(item, str): + # Pattern to match: ] hostname: or ] hostname (space before colon) + # This will capture the hostname between the last ] and either : or end of string + pattern = r"\]\s*([^\s\[\]:]+\.(?:wisc\.cloudlab\.us|utah\.cloudlab\.us|clemson\.cloudlab\.us|[a-z0-9.-]+))(?:\s*:|$)" + match = re.search(pattern, item) + if match: + hosts.append(match.group(1)) + continue + + # Fallback pattern for any hostname-like string after ] + pattern = r"\]\s*([a-zA-Z0-9.-]+\.[a-zA-Z0-9.-]+)" + match = re.search(pattern, item) + if match: + hostname = match.group(1) + # Make sure it's not just the username + if "." in hostname and hostname != "saleha": + hosts.append(hostname) + continue + + # Remove duplicates while preserving order + unique_hosts = [] + for host in hosts: + if host not in unique_hosts: + unique_hosts.append(host) + + return unique_hosts + + +def are_nodes_ready(context, slice_name: str, aggregate_name: str) -> bool: + try: + aggregate = get_aggregate(aggregate_name) + sliver_status = aggregate.sliverstatus(context, slice_name) + resources = sliver_status.get("geni_resources", []) + return all(resource.get("pg_status") == "ready" for resource in resources) + except Exception as e: + print(f"Error: {e}") + raise e + + +def create_experiment( + context, + hardware_type, + nodes, + duration, + os_type, + k8s, + ssh_user, + ssh_key, + pod_network_cidr, + deploy_sregym, + deploy_key, +): + hardware_info_list = collect_and_parse_hardware_info() + cluster_name = None + + for item in hardware_info_list: + if item["hardware_name"].strip() == hardware_type.strip(): + if item["total"] >= nodes and item["free"] >= nodes: + print(f"Creating a {nodes} node cluster of {hardware_type} at {item['cluster_name']}") + cluster_name = item["cluster_name"] + break + else: + print(f"Not enough {hardware_type} nodes available at {item['cluster_name']}") + + if cluster_name is None: + print(f"No {hardware_type} nodes available") + return + + print(f"{hardware_type} is available at {cluster_name}\n") + aggregate_name = cluster_name.replace("Cloudlab ", "").lower() + aggregate = get_aggregate(aggregate_name) + + slice_name = f"exp-{random.randint(100000,999999)}" + expires = datetime.datetime.now() + datetime.timedelta(hours=duration) + + # Build simple RSpec + req = portal.context.makeRequestRSpec() + pcs = [] + for i in range(nodes): + n = req.RawPC(f"node{i}") + n.hardware_type = hardware_type + n.disk_image = f"urn:publicid:IDN+emulab.net+image+emulab-ops//" f"{os_type}" + n.routable_control_ip = True + pcs.append(n) + req.Link(members=pcs) + + print(f"🔧 Creating slice {slice_name} …") + context.cf.createSlice(context, slice_name, exp=expires, desc="Quick experiment via genictl") + + print(f"🚜 Allocating sliver on {aggregate_name} …") + manifest = aggregate.createsliver(context, slice_name, req) + + geni.util.printlogininfo(manifest=manifest) + + # save the manifest to a file + login_info = geni.util._corelogininfo(manifest) + with open(f"{slice_name}.experiment.info.json", "w") as f: + f.write( + json.dumps( + { + "slice_name": slice_name, + "aggregate_name": aggregate_name, + "duration": duration, + "hardware_type": hardware_type, + "nodes": nodes, + "os_type": os_type, + "k8s": k8s, + "deploy_sregym": deploy_sregym, + "deploy_key": deploy_key, + "pod_network_cidr": pod_network_cidr, + "created_at": datetime.datetime.now().isoformat(), + "login_info": login_info, + }, + indent=2, + ) + ) + + if not k8s: + return # user didn't ask for Kubernetes + + # ── Kubernetes path ─────────────────────────────────────────────────── + print("\n⚙️ --k8s flag detected → bootstrapping Kubernetes once nodes are reachable") + + logininfo = geni.util._corelogininfo(manifest) + hosts = _host_list_from_logininfo(logininfo) + + print(f"🔍 Debug: Raw logininfo: {logininfo}") + print(f"🔍 Debug: Extracted hosts: {hosts}") + + if not hosts: + sys.exit("❌ Couldn't parse node hostnames from login info") + + # Validate that we got actual hostnames, not usernames + valid_hosts = [] + for host in hosts: + if "." in host and not host == ssh_user: + valid_hosts.append(host) + else: + print(f"⚠️ Skipping invalid hostname: {host}") + + if not valid_hosts: + print("❌ No valid hostnames found! Raw login info:") + for item in logininfo: + print(f" {item}") + sys.exit("Cannot proceed without valid hostnames") + + hosts = valid_hosts + print(f"✅ Using hosts: {hosts}") + + cfg = { + "cloudlab": { + "ssh_user": ssh_user, + "ssh_key": ssh_key, + "nodes": hosts, + }, + "pod_network_cidr": pod_network_cidr, + "deploy_sregym": deploy_sregym, + "deploy_key": deploy_key, + } + + print(f"🔍 Debug: Config: \n{json.dumps(cfg, indent=2)}") + + print("⌛ Waiting (≤20 min) for nodes to get ready …") + t0 = time.time() + check_count = 0 + while time.time() - t0 < 1200: # 20 minutes + elapsed = time.time() - t0 + check_count += 1 + + try: + if are_nodes_ready(context, slice_name, aggregate_name): + print(f"✅ All nodes ready after {elapsed:.1f}s!") + break + except Exception as e: + print(f"⚠️ Error checking node reachability (attempt {check_count}): {e}") + + # Print status every minute + if check_count == 1 or elapsed % 60 < 30: # First check or every ~minute + print(f" Still waiting... {elapsed:.0f}s elapsed, checking {len(hosts)} hosts") + + time.sleep(10) # Check every 10 seconds + else: + print("⚠️ Nodes not reachable after 20 min – skipping K8s bootstrap") + print(" You can try running the following manually once nodes are ready:") + print(f" ssh {ssh_user}@{hosts[0]}") + return + + print("🚀 Running cluster_setup …") + try: + if deploy_sregym: + setup_cloudlab_cluster_with_sregym(cfg) + else: + setup_cloudlab_cluster(cfg) + print("✅ Kubernetes cluster ready!") + except Exception as e: + print(f"❌ Cluster setup failed: {e}") + print(" Nodes are reachable but Kubernetes setup encountered an error.") + + +# Define Click command group +@click.group() +def cli(): + """GENI CloudLab Experiment Management Tool""" + pass + + +# Create slice command +@cli.command("create-slice") +@click.argument("slice_name") +@click.option("--hours", type=float, default=1, callback=validate_hours, help="Hours until expiration") +@click.option("--description", default="CloudLab experiment", help="Slice description") +def cmd_create_slice(slice_name, hours, description): + """Create a new slice""" + context = geni.util.loadContext() + create_slice(context, slice_name, hours, description) + + +# Create sliver command +@cli.command("create-sliver") +@click.argument("slice_name") +@click.argument("rspec_file") +@click.option( + "--site", + type=click.Choice(["utah", "clemson", "wisconsin"], case_sensitive=False), + required=True, + help="CloudLab site", +) +def cmd_create_sliver(slice_name, rspec_file, site): + """Create a new sliver""" + context = geni.util.loadContext() + create_sliver(context, slice_name, rspec_file, site) + + +# Sliver status command +@cli.command("sliver-status") +@click.argument("slice_name") +@click.option( + "--site", + type=click.Choice(["utah", "clemson", "wisconsin"], case_sensitive=False), + required=True, + help="CloudLab site", +) +def cmd_sliver_status(slice_name, site): + """Get sliver status""" + context = geni.util.loadContext() + get_sliver_status(context, slice_name, site) + + +# Renew slice command +@cli.command("renew-slice") +@click.argument("slice_name") +@click.option("--hours", type=float, default=1, callback=validate_hours, help="Hours to extend") +def cmd_renew_slice(slice_name, hours): + """Renew a slice""" + context = geni.util.loadContext() + renew_slice(context, slice_name, hours) + + +# Renew sliver command +@cli.command("renew-sliver") +@click.argument("slice_name") +@click.option("--hours", type=float, default=1, callback=validate_hours, help="Hours to extend") +@click.option( + "--site", + type=click.Choice(["utah", "clemson", "wisconsin"], case_sensitive=False), + required=True, + help="CloudLab site", +) +def cmd_renew_sliver(slice_name, hours, site): + """Renew a sliver""" + context = geni.util.loadContext() + renew_sliver(context, slice_name, hours, site) + + +# List slices command +@cli.command("list-slices") +def cmd_list_slices(): + """List all slices""" + context = geni.util.loadContext() + list_slices(context) + + +# List sliver specifications command +@cli.command("sliver-spec") +@click.argument("slice_name") +@click.option( + "--site", + type=click.Choice(["utah", "clemson", "wisconsin"], case_sensitive=False), + required=True, + help="CloudLab site", +) +def cmd_sliver_spec(slice_name, site): + """List sliver specifications""" + context = geni.util.loadContext() + list_sliver_spec(context, slice_name, site) + + +# Delete sliver command +@cli.command("delete-sliver") +@click.argument("slice_name") +@click.option( + "--site", + type=click.Choice(["utah", "clemson", "wisconsin"], case_sensitive=False), + required=True, + help="CloudLab site", +) +def cmd_delete_sliver(slice_name, site): + """Delete a sliver""" + context = geni.util.loadContext() + delete_sliver(context, slice_name, site) + + +# Get hardware info command +@cli.command("get-hardware-info") +def cmd_get_hardware_info(): + """Get available hardware information from CloudLab""" + get_hardware_info() + + +# Create experiment command +# @cli.command("create-experiment") +# @click.option("--hardware-type", required=True, help="Hardware type for the nodes") +# @click.option("--duration", type=float, default=1, callback=validate_hours, help="Duration in hours") +# @click.option("--node-count", type=int, default=3, help="Number of nodes to create (default: 3)") +# @click.option( +# "--os-type", type=click.Choice(OS_TYPES), default="UBUNTU22-64-STD", help="OS image (default: UBUNTU22-64-STD)" +# ) +# def cmd_create_experiment(hardware_type, duration, node_count, os_type): +# """Create a 3-node experiment with specified hardware type""" +# context = geni.util.loadContext() +# create_experiment(context, hardware_type, duration, node_count, os_type) + + +# Renew experiment command +@cli.command("renew-experiment") +@click.argument("slice_name") +@click.option( + "--site", + type=click.Choice(["utah", "clemson", "wisconsin"], case_sensitive=False), + required=True, + help="CloudLab site", +) +@click.option("--hours", type=float, default=1, callback=validate_hours, help="Hours to extend") +def cmd_renew_experiment(slice_name, site, hours): + """Renew both slice and sliver for an experiment""" + context = geni.util.loadContext() + renew_experiment(context, slice_name, site, hours) + + +# Create experiment command +@cli.command("create-experiment") +@click.option("--hardware-type", default="c220g5", help="Hardware type") +@click.option("--nodes", type=int, default=3, help="Number of nodes") +@click.option("--duration", type=int, default=1, help="Duration in hours") +@click.option("--os-type", default="UBUNTU22-64-STD", help="OS image") +@click.option("--ssh-user", help="SSH username") +@click.option("--ssh-key", help="SSH privatekey file") +@click.option("--k8s", is_flag=True, help="Bootstrap Kubernetes after sliver is ready") +@click.option("--pod-network-cidr", default="192.168.0.0/16", help="Calico pod CIDR (default 192.168.0.0/16)") +@click.option("--deploy-sregym", is_flag=True, help="Deploy SREGym after K8s cluster is ready") +@click.option("--deploy-key", help="Path to SSH deploy key for SREGym private repo") +def cmd_create_experiment( + hardware_type, nodes, duration, os_type, k8s, ssh_user, ssh_key, pod_network_cidr, deploy_sregym, deploy_key +): + """Create slice + sliver quickly""" + context = geni.util.loadContext() + create_experiment( + context, + hardware_type, + nodes, + duration, + os_type, + k8s, + ssh_user, + ssh_key, + pod_network_cidr, + deploy_sregym, + deploy_key, + ) + + +if __name__ == "__main__": + cli() diff --git a/scripts/geni_lib/mod/geni_lib_xlab-1.0.0.tar.gz b/scripts/geni_lib/mod/geni_lib_xlab-1.0.0.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..cd48f55ebaf7321fd49ca588ceee2c934b927b2e GIT binary patch literal 104018 zcmV)FK)=5qiwFpYU^r+3|7T@xXJNow=b?w+R4 zOj;zXP$(1%1)xwUd&yq<>#6hjv*);8@|RcnTV{Wg{#{;PTT{RB{mSy{%F17?$G?1r zzdX&H1e*WLxB0iaWsNd_& z&P{j4UbdH)USs}mZEfJcD_a}O`oD6{uWT%@Y%XsuuP$%F{9oPLg!#YmIp=@uJHfPa zv%Y_&fA*Ik|5rE5@_((C|DQ$vuWIst{l|@!jV*h*7X9C}{69R{-9J9xf6@8Bxw$?; z{;w}@!2Cz~zp=HsvAzlG#L7A>27j@ZKj-}aM&v0__$-oOw23~Hr~uJMPTp+Ivt3bVw&$+IYFHZ1rXc}|#Gp3}dz z{LuC9eK&UkD@v@u8^RAS%=~O@Ww%ad^%Kv@ytHHW1rRT_vWUQBw;ubB7)OH+LWmv! z@@{nRC7x@YM}zFaNjw`dSv4^cUDYunfYIp24+TtmC2L_H@nr;WlhkVY1Ir1=Z7WYb zm%!YNEyvOuaYC2%BX*L^@8^M&=-24lzjj0M985Ww=*{^PBG<7d`Rhd%5- zdi^|eZUPP-@J(}ouC+@*@MDLlbriY&!0$Vf*v0a^EVM-o)OS(_!VmjF?)u@-ilc

    9f9sc0dC3TgCtJSf^BN{3 z&Jxc_0w2n92Iqb6SkNKF-g1I8Qs5*M0Ty_!L^vYyCJ?zWsXSt}!uB?*ilu@3=qtki?~1MjVY z4=4k1@ZYq9s?;DEK$6uGxUKpt*A>K5k_)aL|g1)~x)>{XiIq~1Ss01}Kl zG!ih|7zu(v)FLEOl(qNX82WoNX5G=pcx4&7W6aXx5OZLICHSNJB^poCC8-4MOd zdzgVD!gYM!*-3yIt#J7}b!g~z1IHhwO*VZr`B5NV%u4sMbSrIlG`WV19Kj3?{E?qg zPdiW%-noo2aT4{tG-YzsiHWWAlz~c#<+TPBL*gi-kR=$pV581HM0o&=faDk?fQ9EW zF)@gksNoDYkb*)9lQHMuJE+!yMUwtWy&#|;(O}>qK!TJyz|(v>FoJ++1C&Q7%V~f! zXeS*ondSM!_I{QY*JgTI>Ih%z*^raAgWq*{!v0Ogqw?kO9Eh_8nMU(i^uQL89&b5Zpv ziRL)*04)@JI7VzBjhV)Qqn7daC~sOVD2Bh1X4_!vf|}=;ik|oAC4C>+LkJ+T^!TaH zQZ{0(SM^a&vzFN`@z@;F6iBL1x?>dJ97x4OK;n%k>5B$o4H*H2c?q^~0CZ}X&9FQH z3G8u7qnww@FCfC}HF>w0;^2YIe(;ix<@$-&XZT>7A$ul4-*-TRq_XTK$uJ~%=nOqv zMx-9ZdS*(0m}k_o(2(7f3Zw6HCN)577y@6wJQG&MCzKH>i6xkpZl$qM{zDKT zKUw8QV;{9WL+=8)3w<9l;4n^19|+v;gO9=^iN;Qljk|-ygY`0u!mjrSOp^Ndo|qi4 zGT|t((f47s0Ji97$fY1lfhs^7&XFxmP2m!h2_@GXImumH<5OLxn({P4O92q`0J@v^ z;U~-~O@m|ON)v3143oxz13v(f0qMN(hTA|)#>=6GDHI10sFfw@hm`IJkrv?!0KH{X z7$<;}`tX?;-B{9*cbIaJl6k>aElmykU!*=Xki8i7j8#r@L!tZF-3}uHt^<4#V4SG0 z0$xG>o1K+Qfm#R3o#t!7gmqvQ&B1+W93~sIwg<>J8W*>cF;d1>KqAxv_>=^8WTb+* z5W_p{5mNzFyNw%IT0jl+B*y0C^bOsE1W zf~2`(o%exvCwGSgm+X_}Bn*Qn3?#RU(loRULj|;sBdL?YJmD2kE{mofd{8J+VHm{$ zG8t)ds2#{2arwq-pnOpZno`@<8lcqA<0!)cka9^I8B*VF07wTkb}>acPAK6;m(WCS zXw>E+V8B<@J}!==_&6maFY!hIpLeKaw{gkN=EQ(RJX4rPq=S--$L6=KmfuEk#k>Qg zFmu`BCoavl@pq|`1iCF;Y?O(3-)~pBE~=-6W(}o+)GZKK%#d&*;$7*jWqXq@vnKq-k+6fMa;8sO}) zft(f%BWAej(-mo_dOmW-$hDz|Iy9<=xG+b+!(*aZL$mA?Fr;9iW&^7Uh&6k?O*0b| zqH}3)N!LOH_}6G1#bK(?Ccc)TLxCGYh&1s^|2D#=vq%#R)t&)1D2`_FN%NnpEe%1| zR_b{Ydob))+sII5KSGtkkM6GqEKMMQq^K|}DYC3FvTWGa8LWLCj6D-t6{~t83Z%O8 zLw&#TK&adU*^s?W)+YWUVdM{eB&To=f*r+{1DeKZK0ply>B4xZA*^oQ28MTK`(lb= zyo%0`kB*;Mm#J6aUhYLe1p}z$B5bnZbUsG?g;qW$bHRKA;YF$hNb@oC4!BVtkO`Uo zIl#7PDWzYo=Wx4zYVwEfZBCVo)|BEajIdM2l&@P=F7D~WG12>t0}cF`!a9JUm`+?^=9py}^|C$vE!EQ=khknlwdy~90? zQt!%mEtr3xu%pPOx*fEy?tPMle0JzQ3+Y@cq9D}(jdz(OMfDAtSPsiv%xiYckmqZt zUKjb)ul-<-SHb#WAh9`m>+Ud8@NhwpizoGXhp9J_gTVLGOs!qg15NM^eI z5tP}s*leSMZ`mv?wo2~SG*SXv@(|AVA`M4qnzMqi{6;Zz0uAznbn9v*tx>XWG^C{8 zQ5#j7PCBj)g)1@XIvQlCl%|Ruk;u$ek<)@&LWK}4r@kjNVKLo84xy!%O{H}3j~!uC z$!F*yxt4J&6aYfEP$d{Mg-do-p8LFIB5PDvC!v)YAkmbc19q{fOalI|$kcAK2?MXSD2rJu0@ zOZpRf*Be5I>d~@6xWTlu;=t)MWfastXi;oL5(BxM zrUj!YEfA_P&4UIS6_cN=bD~p8L{SNRN-)AU3_5n$OerFb`nIyCaW(cpx(9s6$Z6;+ zD2@+ESeTya9l8iJ($-12%XqW_Irf0NfSp0~qJZTJN-45v1C7TEWv&|@OLHK}ADMAj z&>&3n5diI@_R#Q|K>{XpFoAhd?;sA*WGb)4e7y~-7QE9i#1M&i zKg7X+`pCAVC8k{M=>W*UOBt8tVFu{a9|6 z1dS*hMyLqzEdsy7p^rqPbqSYSkiAUyk?x1PErKk4wi|*oyJXOO`tra9gvq+N8IRE~SY6Yq+8_IepJXXr@G>2{g4f zSZEgvo7e{dhz$?L1BN3A)tGc9WK_e~X zmdAu3wkWr`P=Wt3{RBlLt~!2thqeV*ydfQlOR= zlBO!*=y|+@IKOx#7}vX*i6{ZJ$iHS9er=TN`N%W;i!4}9?3S@jHyRiYDDh&)XZm`=YS4UuCYnlwkWZMdX$qo`9osC{5WH5|Q1v5|Nk0BOxfHe$vlJ%&R7JasG+L zslkLHxg~&c9>5)&7wGlsvT04b8HkX znml1nHYXV5VV||Zdo1M$SN>QS1x5{G0HfX;LWTU;sJcm`f`U7A8qshn)rxI2p=C1Q zp@S*kMOB+eybUf$IiSFpXqwN68X$=tqhU+?CE_De%=H|M2T8QyPszLKhyTcvG53T~ zn69)rc!-L|vHG$mgf??6X#3cK^B_G9JfumXZ39b=ysEjTySXs1o*z&M*fyKRWBNoy zkxqsJAjO9h*^s;cfvyTH4z>wMYel%*C3W?6`E$vm7h6VtK#463yhg#}vP1rLY5pZ1 zI)>2hip*t{$I;v*QI6XOsT61r%!j=;QRVfM%_U^T$- z#ZJ&59~r{9!H9C<0^#f=(PNAcc<4Jj(7@fNb#qI{4IBkoGpjTojfOa``4Jezm7645Kn|>C9xUjt^zb^G(WZhM5cnEJNYY-CifOI@MZr?W z(Igg`jM0BF?(wI$bXdkbeqcD8F}O>_o{6x~p^qz~Lt$*dg*kvhW)#K3e01Y+`pFnw z1Z@(eB4}ZX=cVA~_e5(Ab6j|76$&IG$uKfjxalhQB^wjM=E8AR7#^pditgc*Z-_yp z-mlp_CL7ABj>F6xtJozO!b!1_M$C{J2PJSC-;=9U0{tf=Lg*=heAw>yD>?x9pYsE zck>9UdGxe6SYjtfoMHt6$#c9`w~W51$083J3+*0SOm8j0^i#1G2|h3;ta)C&p+y|s z+jsOB8&o+xq*JmuHPm3Z%AMY@Xn&3QKeT8q6pb)Gw^o%)Hem%ywj9FR8lCRp=oi&EPyF4hVn9)!>$cBgG>p0J@Ijw|BE zb>Q9OBxfcWn4WE}Tsc}ai#*d8_;((+3Zm$)AtivLwgF_2h3o2($}CVKQ=t|>bQLoq zj|FL*^b>H>d?&Db-3tZ*+k-y2u+1S%8?Nah{Jxuu^Wr;&+qalF7mxqkwr}(X%RY70@QKlVCC#FU?R3 zg%`9U^W8ZfQpZlg?;jsnJ4e>e>4AQH13w*~e`KF8IGEPM!-I`j+M$=V*}38%%3VO$ zYW4XqJI4^&pQDl2MKA9*>2oP4bVEPG&x0U;wDZilwLLdq@NXxlpW z`*+q)4w|YmpzAX9N%v+>*-HSyi+8<|;|JST2$lYNlad_;^hj+`I2yZw14<&8C;)ZO zyZ3@9CWj0~=B3v5wzUBo(kYz`MkQR$8porpb@&B@7!m%ezp${LdCUmb^tPwnpg5o=j z1_pjgi|-$x&8)FE1~b&}Z}UCF;xij*^|mkRquyz;a_Z%+;79d}8?0T_;?>(kV1=fO zdY!WA*Jub%WwHExtO7nK+3`~_pfHd|%tmD0-{oBT=z-qHEQbHIW_n` zG1H@CnC5Ke=4hCr8V$CRJdFV03AA*+?7-ajGdxib52BS}66J9!8=@(U=NnZE=%V?xC*l>f(m%XiTSP* zFZLXEzQlWsy~JmMF*wm55}%5sBMQtB0sW z1tL0s%}>m-AafCq!RrDFK+8Rz(1>vi8p}6`PmVibnf1Grt(+_E1ndd`IVH;rHW%4=VES{1H|LZqy|DEmc>>cgfNt%6*{IAO^z;@jJ+t^%P#reO! zzOh={f1hjoYxS{KA8YmT<?6RQUf_wpP}*>J5NzRsZXg z>vZWw=l^2&zpSsWt=9Ly)b@WZ|39n!U*G>y%YP;Rdp$hv+w0l!_*L!y<+YWS!u>Cs zYa3g&{r@@s8jW`^5{UQiMFR2OJv|Vwp31fTSFiuI{a63J)cWtl{?d!h|HbUT4Uh@7 z{a4$6wfwK`zxwYD<^PM=fQ#FIpnoja_FrxP)%IV#{@3gOXIuX_R@ax8H|q7jUjN1V zKis%`w)=P1`oF%pz9HhjHn!Hu|GT_d`+q;n-#b|G#c5r<$!QhTFw6SMNy0S4932S@ z!;0uarNa!5g#Vi;4DiRUl~X4`#0v^4>-D3=%Q2xYVDM@8vQ*rTo&FuAV9JvKrqJ5V z0yfK9KnWZs7lNK*m{#w>q4X?it2GZm`v7R!&lB&A75McwUgs{yAL@V7CQEl(j?e}iedT+H94()~l-phTLBci>O zhn|WbFbRN@dgTuR3{VQM|M;^ZC=o|7X43H(DR?&%C?i>7=w&T|bjSM7|762<2V6Q# z*|nE=ju30u|45^-)h^+U->;hRTOU9TefFI{8)Z~aSQ-Az%TPQcFaSnOvhUQv=Cwg< z*6S;c#jXJ!@hUMRlH#x8x=&6P-aUgT*pok+x1=wpO>3JoDSOtY8+ODWjIqQ=CrG_N z8A-h?PXNuT);Nrc)^^zq*4+zaZ~)kJ)NK??NbM$r?%H{bS4p-&UfBTB$%Bk(_Gzo# zo{0bslis*LAkizw(4?nenU(=q7OrYQjP=Auc!BvF5(#|pL)8Fs#+le4FED?Dyh2s) zQXf+*PHc+!Z)!s+7j-(nHVJC;n#5rBtZcqd-=`!t)chn+?fK^^rTl=F)EvS|q0OPO zk7>Tn;cW}cwWXA$R|`seT>{CM`kewoqHFJTfxtyCD}xXEM<#68w}P;d$y3C4@xmzN zVCq>{H6UJ?kiuc1lz5{_rGQ8cB;`dWi={vcHcc+kOQ3XZ|JU|^ZU5JQpV$8PLN|_l zyfyTh9)MZ?|Ba3HqW!-KBB8ebYy1Co?SD!MXgjfQ4>#`kCG%ygvC*KUBCsfz4a#Q! z7PODRi$a<+%?_&OzHPq6hMTXujDOwhe;xnzcI$s-{MQdFYg=2j7VwR(|8$i0h3Efb z@n36Ob^O;iB>!vs@5`|N>iDl({tNm4VyA!Ri2nkaUpW7>ytYx>f1k(xtE0cZ<-dCU zuh)Ow|GT-qQm_B@`VZ^B2!DC?_^+*1F!s3rcWY&R1=s)8^*a9RGp+w+|8E!mv-Pb7 zfkmnMWyel-D<0D{kcHyAVNa&&YBYX3I9^-r?Oa@(^?p9s+uJ|3K3L0-%d3rF_s`A` zPVmvFVe@3Zt)`J$yV<^4zP3dqLjyO^hhDbF!Z-H(#A?yr`Ohau`v8%nXM2ya6S{b( zbF2CPniRg`5AZA79RFZ7F)P<&lkGO|?SJg;9h_mb(ff4CzDd&uHqR+;0719)hy~RM zP&{q%+HJ9O4WIF9F4s@ww~n zW14r)TvJ#_0qycI6DXNu(H1gp$I>3xuctGBm`6NxZ~}bs*%m)qKwSCK(USEM271r) z@GguV!e)bY(~N=QePuCKmoRg>JHUP!MO0x@!0zF}Pu6=V_C9SBB77cq6uDaM_MZ(s zipm@;3eL#!4`G2wlS;7|pZPXJ85=$A^x`hh9=-CJ(+8UCWZOLFmb`s`C`m z<5j7G2uXzwU9l(^*`WGpJ8|rV?sKA6*<|E*e96g1Q%1BFJ1Ix%rBEowvwlt-&W{g{ z{1lTYKBe^qVu`an4!o9lq?kb7$N@2f)&gJRwd=)1JC+-HY4}~nZmi{Pu`nlXMl)_r zZ@nO3umzL){7D3$>QUEpx1tkfY2iI;qJ>upX3(_bVdd-l5(M_%AHuw<{Nhdg;twlY zm-5vfBp??^Iht4mFNSpR-NX`QeAPGaF`8Oe+;lOqK$bXR>T8{g@fy`k(LTLuVi%j& zB&mwzhOeie)VC7d;rp)_=fwhgkD$cCr=25JWEyE0xa!gcp!*Ym7AXcrcMq$JmWT&s zFV1$3&kruDD-?-IefwLn_a>has--esvCP4zOJfu&*$+qKl*+JCwE19a-_N{ZCF+V( z?IRC!#zH~VsKl4iPvnP0TPb`==Zi*4?jF4Q8?_f5?xea=*A6-@sI-2ah(`b(Ta)i^ToyK4tv+& zGMp+1`H88DM@|R~nusc=@al-Yg3>Go_TupTXD~B3q?QI%gmHr;B!9XFkjKVCheLKL zD;QTJ=P~vFg}1E-QUC4(1&(77FIMaE7+xcMR7dcGj$13tN7P#Sp{j)sqNRDxhcIO5 zza6xPMAY>M_>E(Gak6`I*aIT88`X=6NgqH|r+e*$`PTivb^OoU`F|_pe|}t9+xoGd znBTJhmr}{URsN^-t*tu#r{4do<^Sj1|EuGFYWZ)<|Id^EsTBXSwX#~r|9qzXzdHV> zw*Tt&zh3|A^PlzlUt0eMLG*C%r&0181z_g=f9qR?^PgMmtM&cwpK1Ls#s92-BcVUs zpZ_=tlDKd81K-0fG3Lb@5 zAGAui_s^3M{jmJQiu%A1R-cyj3_j z$-S4HCeh=#Wn?)@|4O-zmRNBLagZAkl9(NndL2cxPLUGricBpWDUsr6=w-iFq@PJe zz7<}ajswSjIBYcsQPgB`?`dRHJBA1aiF)cCD-D7Zifvm*&K-y+0N@0?*H*;kCf!61ax8E_ zWQ~#AtQ)6)M*?*mdtD3&^ap;Qax|*MjSL8;Zq#^J5JKe}3`9&a`()ZoL8HR>MZst9 znnOC-3eI6XTdFi0pSCRu>nUNSu?z}E(OjlyF_l;Y>LV&AoG19FT8XRezuNw*?Z5i( zmB@_($=_dmUz{fFm&H@`t6uw(@m&&N4=c3WQqG4C}H&)Gvkx$`2s zD14h_ZF6-p}x&_XZ%B$?y2^;+G2W^7Gv*PyrK> zssdGpmlN60Q?fS z3C!xKyuo13uJX2Uw##Pe?gD%xTg7({9gG(!y(}$i#k;ouYy1C8v;XV(zuNvc?f=M2 z)7Q5DH@3D`iuXUPt*_Pg{~Ou=8{d}cKf~H*8^LtXpv)SucY3fVcT0W)+6hJMRsW599MXT=}luadGoD>XdVaHy*cdsq|f-oI!N5Pba`!NjZ%+Ao& zR2u$nOn4({T8MneF0{he&Fgjr`s{o0@~#ltRumA0*vsa%1_q{Wroln8jPzh9=+P$~ zXzGP-@5V_I-%ENod2k2yZlWmA5iOM=tmmLF=Ko^5i9Ofv1BqrhKz`ae-#0f`Ny>sU zpWMTYNT+SCqVIvRQO^D$E85^AQ3RS0Ka00hD!gn$M)zzmuuY_i-5>f*jpFg#wZTfh z_t6sd!(kB$`3^ozbrqO>n>a;87%MlbuS{L=;^j_xA$R)JH5)LO1v}=GHsvtK_#D3!s%fsCL72g z_^lIDPEgL$0HCVRI1OL8yG{`FXm;bA&1Ij%!0dA9&ggO|x3)=mg|6A5FsSr0`AI+b zvls3XWh(^6?+NhD#hkC&pE*d|rAhGT1S?yTNAm+AT@FH6kq1s+j3Qf;7Vda;cdDtu zM7L}gFG!lvsTtIw#h|&3h|k@9DUW_V+&MmVvi_~;Ki)6du`*}aA@1(6c@OvwdXD~Z z)6*ywJL4d7m_L_y7Ja9(6*2Oq&I+gQ)b2KUZC;_fg5p@a1{2;fteZDZ=sx%^^A<1b za<_*jU6U%8z$e})y7yQK0Uz(6#D}YE0^7d>)QopnDf4JFQ~t6NFq8perdmBzBjIJ# zixYq3Fvk<(O2a6MNd}n&8-TJ;e>U_XV5&j$T1>}-<@aakJiTU|NKnNy?o|J44U+W%Agf2R9??w^1A=Pduv${OZ;FZh3e8*Beh z9slte^S@U|fJ`|D@J3Ox6?RNK?2o`Ap1BDPlX)o~cCqj)?~)TjHW=j@CCMxzNqe{^ zrV~TWHKdnxCsrX0gMln$Lzdf<#U4}=@jh_E%6NkrxKX_+@gc7lelR&?emc$~GW;!Y z1|wkTEsip)_9htLB)ftPvbI$v~dg`kRM41ZTsr75 z${4dopN_QSx))H|%YGtinf~&dM6{R%R|?AN_?|IaUFp4ppw=25uU97hZE1El%+`Qfjp#|NjPCw>ge@dCZL zA1tM5a36@ugyDNs=|ST>F^7J80cs++MR6)j;rRp3DFK+p`0^SsCwI*I=nGk)F&d`_%3Otw=jG-|KF*VtpeUn#E zVCMN&^S7O|{QlXz>b*9Sg}s1I1jSpTkUj3Ghw02`ckw z^D{i3_(GlHxOM3ggtzL~krVoZ7w*`I!+gV@G1+^HCT*!tN)E6jKD~t$m9m7x-`Jn2 z^X;iSWEFqYd-5vU>**(q=ndwgWd|)`wqX6ck+r0pWa3x>81SU=;f`2(3sT(ek~+Js zi18eiV#?4Qpxr3-Reb0zXire3r)u7+#n*}@zdSnz=DVxnJ@g(uSxVt$9}CE)5(TjF zE__BR2t;0EjJYZUCi_kF_-j;}g!w|^%Vlt;>jpaLmzO6lDn1z0&&~LkQaB~p&%5H( zTP$__e|`VUm%IO4b^pui%F5c-Ry}pU>HS}EYuwU{%>Tvif7#evTdD7V`DWyQ9sl!X z#Q)U!UuyYZmjBPb17_C!FPp2&#rt2jHtYQFZx#QuhVegJ6XJjVeIkF%$Q~yKaONxR zUgtvD5p9zR5NL9XPdldv;!fR;wR41dHPrJ*0Z_fdP~DFJMP9^PumAP>|7Pod9sgUe z|K;_ci@T@C0nb?fH#bY?zqhu~|5LC3Z?gWcPh0>0ZR3BPo4)*=`j3^1yRn|jeBkt! zpYZKiBQJC4!v}z9@BFlTjlg@1P1>HgZL{s*|MNj(hJV znV)(iFO)mE+QDod8!;P@*yf%BgZFm-F{0GFtk+Zf^`YrKMkq^~M&vVxPz-yvTKo+9 z0{!D>^JyPl3{pIFT>3}{^zg|xz_cDm!3TEan{QDV8yJq;n3FKojf>5FDk2^T5%FLP zn1~5{nT(5sC5V5TMA-u5kB27ZW%W|X`P-5n%6Ul=;l7h12+O#Ag=ZQw;-a1`)4YDQ z5%UKi!F&*VPT2QaShoX!{6`z*PB`q*O4x3T_@}uCn03?(Qg7j^1!Ae1&mmfgwLYHg z9L>j7coxCrE3ErUEENrOmcpmcIGdxvq}i;#JYzPW#9lc6WR9S8S(pkKU)YTP8ieVu z7w7X){ys}hqC?GBqBk_96PzADBgLV;7m|h=q=m)djJosAuKn{l#}~7&6lN9FeJz0o zjTs`1o4>ac6)L;DN#F06i+J;>QqNl z!KM`zif>2i%hO7ufa#Dtnu;#6X@~K3`^9d(sQrJn|F8D{)&9R}{=e9NUJ`&A=l|C? zHcI|K_+9(|-qQc~!%Y9*3poH^$OHH+7a-|Lek4^S&<18~?(whcIt1k-+((PMuwxhx z^UMz{M|0sI3FC3#huOBpuX^|}%7e_01FuUN3{e$Kg_DuT?h^18Z=}6V8R|qng2a(1Hy;VFBKB?yn;o=@0C_hvDJc3rd#HU|q8ws{ZmWN=%>y<(i@ehhL}n zp?sYlmDM8R2|&{s_1IYwKh&X#y2Um`acV(1!2=NYC?6y++p!6|V=fhni4dpw@Ac;p&Vp*s6475Z#}(9w1z3)q$(q7%pM%j}ftw0Eq8~l08Kjy) z4CQWhmU4%~lL!rW0{<+&P|f14RmNMZRIZAi zwG`o$Dh~!hG~UA(a((6H|7dC_l9?*jWS7QmTUUh2<4g>wwMT4$glc``e42~=|Pm&?YAEA&ph1Xn!Y#(PtS zD7$$UV2A`gNmHd8Q0uGU3auwEBPyulFTxsZoJvl+g>~{N{{Mzn2b?tt&*^!n5?$u8zx+aURksAAj z?*G*L|MmWVz5ieD|4-fj&z^S&z>M=hD;w)u#r^-~mHPbeo9+LLo&SoBf6ED7>*Ch4 zE<-X{{ET+#FEi)%-|$S3AEIfU$zwrddFEFdHbV6|{j?j6K#}l73G(ymC6GpYd%IS7 z+dmKU)N{M6oODpe+0Ic*f)zX14S0_V6@6{O)Wno}H&Ketw#@cMIZR_a^yoFHLfbJB z)pxfK_Kv?di+qpw21N-B;rIPf6I2x1n-kL>Y-!Thn2L^uCH3z1U{mA#QSx&aPpu{z2vs=2_SQpeJ|$hEk!?*%T5*gqIb)KnJrBx)Qp zc9#)`i~9YHZcTUtTp1Eq@RiPPU879_i0i5-eE)J6)LQ?m^}kyGtM$Jr`d>eYau?T( zSIYmgy0%)-|5htpq+x34CWA&^tmdjjj(5HolnIV6jlGZ`b;E zt^e2h{}-+QZ*BZg>;JX>Z|MJgL++LGKdr8;Y)buqYYk<=%GTEMX089frT+iJx0e42 zLnBZSRvrcuzf<*#;ss>w;Bge#UY2+s?tbrIG}L*ezK1*gr%CiU)}u|U@)7WvrYD5z z@pme&p&=`SBIRe^s4|-rSBDH3c29&v>;gt+LIVJlATaZqOAZA;wlMOtRe+a@?ol*n zY7>N3yKR8Q0J&#DMF2WbARAOIT&pXi@}>hbb>2r~oRBIMETRpQJX_~_VFn^#|1tLA zfdvrki{pPBhe@+@PjE-pq(6& zrow>;O2o6?ryM#M38AH5jl$!pbX4?Fs$y<_m1r^XnS6=Xg!rvKmuG>RA#{bE77$$^ zjQPG0JWcN~V6F60M86c@_il1O;E_PoO?ozOc-GOvYgXT%K@eSon6?AmktT^d38*LLO-*K z{iaw`R*Ul0#4X9#tP%yG`uCt8485koqI}k~Sh95BH1&^a_OBr(zj@Ye#F6m=9k_Gj z^sC|0mui?Rl^2l9n>fXMxb^yXNbAah2&0Eq+xlL8!T-2kkU1^a8K)oCBq5A3{VxSp zU`^d)--RP zQArtMdRv+Pq$AM5E<=|0T{<^FTq5tYlRVSmzxmkW$F)C2MINWf0}%~*G+(f(j(gvg zXP0)F;M3KVz5S0nmxmXa?&V24g<5zCB4 zp+BfDOH9Y(mhm%Av=(CyjxZg}V;53`k;K?chb;A5?XV$zalM;-C@%5U#4uN~Ol0)l zU^WdpSmE4zk~;5b9y4PHSqL@k#1mO9KJ8vMSP}H~5+35|swwm6(;O&hHd;9h|Lr0o z#d7$8O5Mpe#WxTPcm1d2+ISzTJUqN@=&X4eSg~0&VlKRdSN}6!;Lgj zvsRn56`a}Y-{KvL2507dows_qlo9-E#e&4DT&-PKtVjIYIzOY9Pg<9Fx#jDH)}A35 z_}hjBD)W*m7{IVjps$At0w(8lf+<+^rb$@^Mxh1JiH|sL}w~oiU;TCm;2pS4u}IG9V)v;pCcHGxDsB5FjQ$o*@ar!sQtf-N&w+GLkp-P4yqozsCgJP5}I{M zGx1X+nFW2}i@eYd!zdZyj~2Qpus#){3tW}j#N85YLshHLgJc8cp(_-w>Kt#Sx@%G$ zEA*+_c_9=~O(WLK3Mf2s?mU22UDC8@lpa`7w%k!so_ff;WU{N#Pko|=@UvQRFO3#f zn&!O;6;!jo^r2eK4QX1xdnYCi>C|cgPQf6A=>b#d#tS0k|A>mUOeg06U! z1L%tfyuuXy`c+Gwl6l4PZdFYQKI(03s;a;hXg3oLN=b;kU-HKZD5gKI;D_V>H%)hL zPh~&Wj>d6@SBq3dVHz3EEUtcX*NNf4VjvCY)r&c>OeYq9)hd}DN{NxHvZlg{P8*Y_ zDA!~+rT}l46Mz0;-fldN*z-{Oh3IU$3g4W_hTt;5OfsP49*Qw*-{FScILcuG48}}M zI4l9(Iyongk%A9~8d!zVLkcVuewSH*H6U!UHfi?KCUB}%U`yGK6$cJ*18ryFs4%}s z5DkaQgK6;q$EnntA$xc#iz)uERt4P3>8R6A_zXZxrB#`$4oE`7X+@qV8%nk4lhA`< zTnDiD1M?zZJpfYz&(T`lfsy*>$>J};Jj|qkwDm~rl_oF<_IF=&s zk9@6bL27|Eyz#=_MY&q!wMqr?ruon+43NSeJ{4%FWAf7TV|a-=ma>`3X`%QxfvGCW zoN?S<(<-2j-sSF0wWCMsh}!d3-B3*5|+Kzpo$~qyU;8-C5dU zmZ?Bl&N7xg$c!2UfTe0z`mqlG-^sr_6&pM;CuT-A80_J7(nIPT3!XLNoV!@)%ps8R2#eD{UWq>c}Ge zx+Jr}zESBj`&zm57RJCR*u$)PJyHDp=aaLGVkLH2WD(OttppEW;v;!g1$pP< z;_Tq3%ZvSU%*Azi(AzocAyu2#Jcx|bQ{=$}uw*a^hUA|aBcjRq5m-yiBAISXV>-cb z(lAU*pAe{gDc`cBwzD|5gD&h{FQFYWXT%v8cMEC9MnxhtiigztFyj>?m}h}V8#PZ+|x@||Qw z8DTEPf-EC97|_X(gf>x|%^Hk#xYo7{a>fmm2RyW{nti;ct_gDKTH7vw7X2_6?yw4p zQz3JxA=M6hd5bTvx@+4k6(`ha>O`&*Ags!EckS9J$b<%NoH!#~H_4Br>MrABfApb#E3>1sK5(Z@Rs%87MRuBnyZ-< zEBxnb8@^my-^28@Z6jD%D>Ns~2s1$>T~%0JZLh>>af-WJaf-WBytsRdyZeTs#kIIQ z6nA%bcXxM(y$=6(ZgQLT&=SdVS}AD^em(0;01_LuBt9*-_JVK&y{jt%|(Btv(S#ZW}DINgzLp%tCBm?+Wy zt&ryyB!64xUqPQ2dx{)klbyd04*TQAL3^b`dv)j#HTjw2NJ{%m*(vkXm`&u2ERmc! zhOE1XguXB!57nh~@Q>IOAtYAynnOaUSQeiSoqg#%X^aLYXFiK0r zvL%Kd&Cm;Nv}nvT1iJWtgrpbxQ~z3Qa&In$<@wQo>)0_F{41!6>;VdnS@)|^CjHE9 z@NKr3vY`zXc=S_F1G}_+&u;>2B-jmwwuMTPdg#91n zj#GQmzv~WxjyPGTNji$GghP{EII$kZMWR+nrEuQI+`e( z8%!?43?8VskWps5QV#%IrjR(?Ge8Oro6bqBhg6(p5o5Yghs=XNApC9)91j~cvRJ7% zuaqJLSg53JhVK`byF6`EJx0v1m$Az1V^P;-N2;R5&`NT?537{j;HRCW99Q2EGw*83HVKHLZ)&y{*p) z<;V8==LT14e>IvW{`R}p>lW#ATSi;Qyzy2BGw zu`$vHQiYgEGvrV(Vrf!OX4E=mDVR7mgc`A2Bu!ZFV>|v%$=AOW)Db;r-9V6~pokz5vTj5;Juk;) z^vGEk^wcFQy&74&9-#c3A6?G1o#pr(4WCqS11&@YpOBR_Y5>;#nmH?>^la07b$#K1 z=eeZ#nIEhJ3G5m*`-=!OBK>xJj=zmAxKZNmy17z>m4R=?;ro^T<|{dDdMwqv;DJ|? zU@t@eqh*Qb>m#p|<|9_#n+~iV39c4NY4Hmc4^n3xUllv0 z!S+p=Hc)vF{1@n6I$galINm^~rv&r_AIG4@(^rE{1puJF1jW(3ZB|EU4s%cB8twEo znixUy+cE%}*UKF)U=Bw%0*rCb9Qx?%-^ANpj%&M@W!7QJQ`&js*j{5}xK`-4<-$3>hvJ8X1MG#)vu!we?&|+M?uiT2o&fNs0_w!W zW5v9ZfXq29%TaM~9vc!rgkPjmV%vDm7@ zolo$~tYBt4ORu?CgkCIwe;#_mZ>25l*M0Zmh7qqzNHaIymK5yBp9gf18MA2q=}3wy zCfe41S||K-qW%!ggzDz|<@>~RgtWD(2cmXu(dYaHL46L1!=I)&k%Wn`R9Hw%6<=FG zbx!u?!fIoBpI^KIIoWe%cM1qA{Yy{^ zO_3{zJV~(fFuq&eEj7{ZcnGIujl}~#+8nc~gmFo(bYOp#JBMO&;)t|+mACGgOO-J* z9uji*K7}nDHq%{XwXYk4v)~`%*F3lcf~Q`>1W`GU--kopUqF_e$VX&Uw^PDe6jfg3 z1knl-8}86`(l?mV%Di9o6r8m2LlNqm*;;$^*%3Y95R0-R>Ei@d*UC6bzoW#eQ$-k- z7$eyEFJN4p8aKxvFprP52N8~{@gn|HM_=;rI4b=Kt=L>v@Hm#H?RmQ~tK3#7v1idh z>2%yAKN9HQS0?A}PZBn`c^r|Q>Y}36M*calX&DjBK}R0TQs`l^wx2&gsMi^f=m$3b zeG>C>wUr7dz0HtMrX7~S4wE>tu{n|ro3tPNOe*i)QoKv?hK9q+zdX%S@0h;zYyoV< z&-<77&|Criw)l({oDs^Bcq4K1;R3n`Z(g_@lxE>5PGMfic6;6OBK->J^0CMfl?t6Z z7Gfqo#&iF!-0ffNHJ}^=yFQeX76WoWAD>f{U!4K-;ARtb{$IjEGg?xwH~QVQ?ZzKr<&(8+?}Wp+E0+!pZ0^yjI(G)Ug^QKYXg+% zbm7!YDKtX`r35b#foyGlvAlM<(YB%~5(m-090Jj^jorO-E9}44*R{4S&6`nQDsFi+ z(1(Iq$EZS_12^?ULyTu_1h$C^x5B5ownEdtZJsFj9eyKB9~exQpi=%540FYQjm`~~ z9njXaKV-`<>vfg!cOSdE4atXp(raL{gJfzR8`@xlIXeeh*yb+DjLy?QC)wSWk6NSR zk`u9iG@N>lDWM6HcU^5$O4Kcou98I2gm(EaKHuiF7$q?=anpTh>&Pm7nN^w))9{)N zzp3$`NJs9O?fL#X)aJIR$6lyn9`p2b z1h74#33pf;Uj$GW_~!+SM=^;bE}ZWfsat*Vt2HrZyf`Q8;CNGd&?@8GnRaj0ReRSQ z(@mw;04>T4UyqZ7)!&|}U@P@$JLDa=vI)|6N=__$3Huc%N#R}x>ia~irxWej7%6^h z1rn@+Ur|TY_9jwdt88dqb@=YFF>7}`D}|^nL52mHOx{UIUE%hDmbPESt`;KRk3X~7 zJizSA1!gzE3JV^+*hY^Nwy8C$S}=zDFH{z%j)g0WBMP~<8c#%b@sdP@r@NgFP8xN< zHgLGS@o{qx1(r)S6v($O_k~3dg%CEMa>Y}hb+mdcA9@hpGNK4|Dxz`w;cg+~d)N@s zMROmm_EqB%Wrgw=JlKDiKJmfJGP2epo1y;N^Mi$9MT- zAemlAANhA+Vt)Z!{F~1ab{@aYHBk0rFi9}Y%vNp1*0NLj7iBtzVzaA8^SHQLs-J@c zHDMN2d~IA?SUfmsRz9hGG;a~qJomf|4W#}Kh*K0Sc*PjUgZf@;P4>$t_On{K8us7p z=DJZ0n)Cd`&5$KpEIYfup62y; z3H>BV>lfAzm`xTvC>AFe$jN3YQ@_Dvx}Jt%RrIi#%^Z5_avPW+o*j7io!b86wvaSL zl4zzC5EJjds)O%v8)+vR+*!r5g%f-9IxM`$z=>l4S`8Hj=A-PPc2QmbAEo>AF(l>F z$w?5npCGB=D=)!;UI29}5-weljeKM#`!pqW?WdsfKZqJ6m8qR)K)ajgGsYEtLOddX zdD&ktZ4Y??`$rFDSpBYoU+010#0q_O63?$0li9;h-Z1O60)1a?gHWl@TpkZobARsd zBc1c=XYG}s;`m9pDq+;pA1I-C1L*51E176~YiFA)Y_WZPN4Vuv52&Z7r?Ra=T;U=l zi9xKpBhw0W=BCEWPI(Lji=`Oy0`G=heYFx8xi0sbB{L(Xoi;cPoAgg90TE?fU+x3k zRr^rsuk6xrRurZ|$d3ndgN}3!FDZfrpH~xOi6Ri}Hju-qG z(J;u!pYFL^&jZ?bFF871Gkc#i4R+1fVpWLfSg9m)s~%0FwEe{nBpqITUIdKD?IZfT zC7_pykC5dSbcxRdrnM|!B1fKSUi<_6P;Pmsu4KP${X|XWzA3og2@QwyuQldqzjNsy zCh{e>>Cs{j;QIwT9>sSr8j*DH&v&4L%oF{PRr@Dvz{c((LHmYxmZ(L*^d8mZE!090 z1Y;PKA7j!CS2fiPZPOz)63*Xuvq`sEl+H+7FfCE_Y&?X$aJIhA(E!^aV^AP6Z+CIu?6(d87x3wGJQH!RJW+T2~swrTm?haA1Gmkk$aL_ zz*g35Ue5;HGi`kg%wEnHC1*FbBT`t>VB3d@v(^;HXFch+hh(;IMYRz;p{el^&q849 zTI#QhHE!`ZwQiOiUXKa;{}M3Z|6;vC25dwO%fO_kG%sapYbHnCCPsy5{(u21GMD|y zi^AWjyoqY&1q?EP$_YT@5J-yyq92O0;vu~~!JUbTqw$@SlkriZ%3W8b~(-qGfogkyOe3O zvQ?w+CZ6Q}!jjjr_Jo0)oh-AOCEb;zg(|DenU8U3`3T8KQfzBI$S8Zl>#LHa^a!wu!(*=Wqs=3ZGG_E5-fibLX*gUgqF zzexI|`6TCU2F(<>(jEHbC}rLfA7(?r*iFNAG=Dxks@7^+Iue1{i+Zb@tCJ8*mA)os@??Ja=PX)Dn6B-quLD|2a7 zG{w>6h_tItYJcGghYo))?YO3!R^9qC=4L025tf0uBNwGD*<{YW#28VE4@6(j;z*2G zJMBUEB1RLn!viTb9nUkf92c1*j6a%CuG0<{mC9PXWf0S6R0rp zfd`JlQ98cXtI?X&T@%$iyo^3us!?3)wSEf6A||fz+=wI*@ftN3pnHoX&x`)d$1oFa*8Ibh z-M6t}`^$=HHtbix6~9?pL0o|;b7?9A?e>*1%Eo^MJIHU$UQm@DdlT{frlWd!Xuiat zYYlJ-ol6;M8#(faG6ywm-}9H>7~8RqZXT^rj7wklZ~CeZxJ<~#6jQk8EiX>R5|h7t zS>&026}ks2w*L>w7y&Dy#_OT{J~HZ#%*K1{tgtJ$!+rp%Yp9>TZf@Rziz4l80C+BM z1RW6-W`BVv=NpHOR|8p%4rNRf-+;xZNzuoyivOWK$mC{q?9Rp@i~1XUO>w&o0z-6c z{T{8AU%!dt6hWkq$yMe?(bAjjGhag)DN2>0q**@WdVV@c_!&@(` zS*5zX(>?a-30Qx>asD(lx7U6ho2a@pbvu}G;vkoK;jZW_X)0Gew{EJe+OwpZ`~@E4 z+?R{#-<2w5g&w@&E-r3G2Mwx;L9;bO1i^>OPN^0gQf_L-vfO>Csi<;geFBE6B|Xrl zp0#7171Y#aK3Wn@(9ZGmtx_ z7DK*cWRdcs$;uX)=i(K|h}D8yFl)SwXnAM&Ec{~(eTB0=xB*9UBV@hzJ?}Pt_Y2gT z{o(genrA!A-$qeWkv~QT*8Dl0AQ^T%pGS>8Prfm2aiioUjq2z$G@|^qg9y`ObL?Sq zUB53CQ!7+mB7T7HKj;)}f4a4)ZO^xTV|<%`0hI%?@Bi<(TmC=v2&4x@hj<*oD=vT! zrvQybX<+j|91rH5q~k-V{sIsj18Kzoa20t3$n`(=`TQtRzB>-(b**!ES^iW9CxpU( zcW>%!Nzb^UG8EP5gnL7aOHX;B^+u<-?6byLn@T$&19&z$dE@bZxWO9k*3uIOS_tC$ z*?x9^-wV75xC|pOKP-1e%1dEV-yb577$#ijiHb_+e(2q<%MIJp=Z@XUcULxrOp*0t z7cvX{gCM*(8;7myzdmaslmjEnO)3+52aT*uBXN$pFv2@afv)N?q{*0qJAfF`Ql`B= z0-3JlBc_(cmj2DpS{-^C&JE75+!EsD$E4p?!0#WpPs$RpZ$q<{2LEVRrVJ31)j$L2=?QIR;KNf_IRv__VN z#k2gKPt{__b1Pw*WO|9e0rfqB0^22ry*gE~IN=&4R|~No7Ar?e=ln^UJl$9<`y6$x zG)b$Z1O}2za%q}4x_vEcFMN&-EdB6CjRlfA0)H;(HyorJkl3o*26z7<{q;b@xz(xS z%c3VWnMVys9FC%f+}2_D7wj3IkKcg2Gr%5N6;44sM%nK)tk`6nsV!Q%SfqZxJ>&}c zNPERJiBfcR<}`g&D>>wQd?Y8|ydXhpdWp%Emw@)dq<;M6N*CYsJfBo9C9e9WE>$bv zIrPT(@US$PKPfrNkqoQ1+bg)tzA;S;jJW2z7m~qhv$5j6sW6 z`BRWOHAhP>R|}h49(^soPuFE)D=5{77!U>8U`k=VaCYR8^e~z-cG!IVhoM za_K$I0;sh4$CUN}j9~LG{HLkfj5?l-c+AKP@65W$`?_^qRwwj9;?7BwgWuhHUPR=V zoNb#__U08{FF!`Tek^(L%JTKPd)?bPeE;Shkxe44!r_7vFKuV(sNg-aEKu6S_r0}R zv0YANdGyCGI-k$E_L|Qk%|vsp)`Urj#s~J-%DHdR>Vts}`C zC_^tjcETb%$%^pFe^TakE8e3yfuD|&-;qrKky-MG+QeTd8vE$99g*SBHds6Mg#lX=fN1Poeej)x=Iz}uC@2^fAt z^J_~VrbE`%(q!{Fo$bzaX#xF2|Dg$Ln?krUOmW#oezzY+)U(>7bR)GG`b+o}-8vM) z$e_3ei~NCG@1JwD+!idKyQb;Nu!>*HH@?n-ivmon+<>#4t=;a6iGIQM_s;FtJ0SSZ z32?96`R1We!ukt;$7Hu`Hr?v=qm5hsFBC3;v~~hZvIN#)(y@T%>T3Nuph4a8^|JiK z8|39}k?^pSY3W~b`}S$X(BGX6iU-(x)ZZ-*NtD2QBst|%JQY*ugh>@kWWiL4H#=)aINThSpSbb3_wqx4J_up(*(b z?j$*Rn?y_po_J(xXX7)RTM!^K#+1$SnV;&GW$fyQZZc|T5&7x)=_=Ytws_VuFBJsQ zbHYNM(6t#=zt5@g6lZZ-(!i9Mv?zX4Ad;~?Y)lyUB#61>m0lZ1d$-`qoF);1xY~}U z?kz-IVs{b09|F!8ej5Q{$8n@!X2q1d4p1hKR;I6yDjDiidzR#9qO-}_r2O#5qx)A1 zH%_&wvBvfc~V#r5Vq=QTByKV{_jst(-%;8-JgJ_T- zDv_Mz(tAgCV`;SJ)S*{)OxnZ(B4uMJ%Y$SUx#c{l&O{`x)rVT9$lg}f{|@>mg++A; z#CLa*#lL@pf1g+mjy;vM;6F09(+}Kqe>}#;fqE~MXp;V?fw=>bI!-#(Ynw~?u07oA z@#k^p7cFJfk|c}G;ADdOB{wdmb=YG0rX>1zf;EJfIyX?Pdtk z1WBsD)6Ft}eQww)9zR~IB=skJKuonbP+OHjM)*!5+TJyW$?*2w5Z zLjVkBsA7gwG#|n zDaTN+puc2bmbZk`rNkwzGAT_y<R`!)a5iN2z4wrH^V(YCIhCM-m*TPmVM zzX(Ezk|;LL3vx+)8e$D&|cm8?gL%IG# zWu_B!8YqU@l-$J&5vL$03sDPE-GfZcAIIj0^%lT+&rvT2b$atb)mN3praAmB9Q$Efr8U`#phKla<+A3D2?|`GIa$D$m{~o_aXW3T z^7ei@g0TqlT%3##T=)w0rxYJtGZh^h20Nw_migNjBOj;1km?R0T-95}(xp3Y-qWi7 zLzdiyy_M0wSQB-e*-Uf2ob+vZKmYCRZ?ZvkSCmE`wq1tTDmFMu+!8_Rj3hTbR<(nou{qb4ktBFz ziJ;zk3_SoHcY~y-wIm+=}$U^J`};(qHUP!>ukC z@3dLwq=vnoU~bRCTQKDn@@jby32RtKCqhInQ7KKyFXXDjeFzGn|HSn3ZFjx_PcIA03&4@lW$b~Gn>!8SSIoA^Ts49g+R zzmCI0A>cPzg_HGjF0j9UAb|(dlawg2#>nH7DZ$0C^^}toQzJrLWTr@2M>D%xVQ;YA z38!-Y-sa_*xf!_8=UhzZlfls$p&Z9KphFpV{aXpq-o=Z8MxBq9_UzgwBvI1BZf;dQ z?N}8N>-YQj?{tNqVo7FP#zC-MuD8u`o9 zpIHQVqWE!~NZUsE;ns5jckt`Ll40 zQ}G>3GANfIz%N?z1*UVV?4$gU0Xcph{`G`Bbi&0OVo8y9* z)^ouR1&f@iex^$k9NuTS8+kJsN#ZtZ{aE-?4P~3Jq(>r;d;h}Q{a)~WEZ%?6bILFE z7K6B4Zp)UfGpRQOB;baM8`%9oUk=7wuixy4B;u{sBN%D&ksPg~BSQF3byn9V%~rX` zHX1a8(Bm=1rvCXx(Iv8(=ZdXj{^1I{z_X6n?KbrGUo}y?4ti`>=Zw{&Jv2IyWTYa$cSy{hPl% zB?h;Aga744OPkX;dB5OG4#pGk^%9>nwMnn^h$fu;66;TJ|_5w zj)BJBS;D?k?|@NT8Hj1ScYgj{^q=g?;nIF30&J25%>{jP>_<4?t%Snku5tJ(Dp{(m zfAV2VEb!a;*@djKA}x%?+MLy#4tQKh{G-31T)Yq%jEH=YPZg|MVYhH`=~ki*sA7ZZaN6JYFN-?UD^L1wTOGWUl>cD? zS{1=b#e_4E?B(Q{BZya2>WQfPBQ9a7IPyv+%C{8Xp~L=|Jbv=A?J=fAw?(fa6~_*0 zVyt(G;(o{#9MH)EDkp)(!;eibi4RcxGn#SLYz7<-0kt(X^=jNeXLEz{CID7H1Ul#gx-x5UdD3ICGNdcd-7=j`8s+? zMq52u0B0Hx4tjP*V$eRm)lLo`w!)i28+_aYr_6uRr&Y|)7vBXomM$i)FTmSm0=01H z7@oRiIH~{ft(YZ&vRk2rm#itEZLnjIUwu*?!t5AyI+_;r7M*sQ1_*iCt!!|{QGR+{ z+^-a#RV*L=8piAGGMeYB)1b;Uexxm3VbbPn9tuNMC;c1RkGG`}{7Rsa3E_TJRaSPC zah~fZ{?qj5&!6Hy2}j>94LE?=a3R%~!E+}+0@quCHXA(2ymTg4*I9OHf>uikO=V5PD1WO2WHoFnqr;2kwKr->u{bYI*B=?Q8;h)KwTH(QB zL=ZL?JQdX#4v@k-2x>z2LRTJ-X)!>i0P_flK=%1Ed+04)o?)sQ=nWt~g~ zT|zZ)Trkw2>GMj#DKO~%MB*3NZtd0@@M>}%xGG_p@`hg>q2Wj=T|6Z_{E+qeFg$K= zR;q)~2X#Jv9<%{vkywee+>L}WkZb7F=NUhhup{hlM}Ow`P_bQS+8jR>qHQUWOv`5m zcOM`7PK8gw3gEk#_^HL?V`_QN4hWaX&F5^fx-5waLabduFAbGi_ESc~m%6~~N3sUd zAlZ5&-i~ph$0n~?TCx4!A8E1RFZ`)Nf3~0$-y|+QFQvt!11TxuIzf|1Jds0Sgc3bD zU4Fh&QTs_|EvId{u6+|kRk;f&SuoxYttMA6TCwL_lr6`-esN~n=o#}8lG*7hyw*`y ziHAg3!k((Z*Pq2}oMZ1VhQ`uM8&4*JTXmt#N)%eehHG=7P`K!^ zyQzcWlZ8$ury%uV7Z*0qi7)0s^tI%4vTYRU<3YkMV?iwoF%Y5SOP0 zP5{Z$jxwJ9MivSGlsI-(S5~3N+(Z8i&%GHsrAt9hS)@9iblvKpX!OaZsG4qM{t%bx zRhZ(ABt$rx?Bsd3 zwXt!sDF;u5_BhlT?cOPM*a;twf8ER2vL=b}jACnnAzZ8Yb!YB|5U)BR$1_FW&voefc^qt@LAZv6V&Kc<7E z^hJM*B-sWLke%O_=BmaxQy%EYEOG2JF+Pp3EviuqC6cBY&NHT6Dvf*wt>@ClH5KO} z$;?GKVIyFgc;>p@JXU@iXXob;Xo5OO{T9}}jOa^Pp!sd*`~DFE{3^^D*Ib8G!K@a$;#qxvvQYyxs`JJSl>Ho&b>zJtRZlu!x^3${*8)FwbG5S3(dZ+ ze`_5i3xGmo;L0T|qw-+Uxp%Yy(OZoMKY=J_muA4VbpXS;knF2s!F!R{^AK2wz=~Nv zaS8PI*}!)fEYz7q$>LUDUFQ-^c^@fFIqO){Qb$Q9JNi>G#jxpLk9AC^V2r`FzUPq$nm?c?om&_O;uO(qUp!r?GvyS6L>9mMQwO| zC?RS;_-m=^yZe4VTibgcpl4Z$uf|C8j%Kw26;4el;4{8NuxslT9wt6%98{3DW8hNi zP*zs+eo`$#JP%zhp@L_qFv%F>d&Sq!QOM@Gt_pf*2C;465|0H13!vj@vzBxwQUhCL zQd}nO{+>z;^<1cxF)IRZiy^!uN2$(1*1H{WEyo;cMI9^Vr5R`WY1KgY#pIwD&8nG~ z)EcoiZZsnm9Q(5sVGZy+adEsF-){0Q(2(+LcuTQJnGSfVx1SNfEvP{}B3hWMskee=ESfd~$y$_@H3DNN=A^om%A{-gon0OgkiIjp%%oo#oD@tVF z2;F&ztClir2fM$<^G9b6H|SXAsa{3%LqhDXeJhxv@!dmfj3%1X20q8!m1DXo^=ssZ$@{MUZ9xlPo76@$lDF7E zp7r|F3ScNN?pwD3+WZbYIb=nk_ZRvOZ+`2e*)N{m6N!j&uk0f~uV6OY zeKrMGh2F^~2Upwekw(|dIXx2a!fdo!ky1kcFb>dCVBTFdfm-*oPva^$?FnyW#R`Mi zoCd!zXhHK?tLQ~5ZM;=Ipo1$9>>cIfQL`_}G{UYe$960ia;N{~W3_gfyGoc+eobQb zUn$)>9cfm!_4ZpqpS(VKdB;5Uxeq0uV#GL)?6$zPANF7&@O)+)vm=p z)zJ71?BJ$baRG&Ug8kYT7v^Cs{XY+Z4#g*ld%-e{WgRdV^<6)n~0DzPdMw^p;O@%+yyy?Nuq zLLTP)Y2zbyPXa!L;p9DL{8j>Vk%3_z%j4y!#Llir%JMNlsX*#iav0O^r;wcKU3W{9 zxj#>hk*gkD&3Vipo`-@aT4(mnMH_WAsNVp}pRF8B2p0aey z%89)6{-e0?DCc_nACFsi202=K#lRwnSm_rA198nQcxx+Z;$eCD^IxL;nv8J6S-9vQ zSj3D~k7A{K_s`QdF972^@b_6L2@IMCgNBPq4ERmo4hL4saD2g&_eqm~!JtkX zpmS3m%*-Tm-MhS8tvCBmzp|5n@^G7Ui-<*eD8~;GFmv}&?{-j^~`%(aB_Xnw-XDm1~ z+e#t~U!A#`IA^>C>kYa%d82;@wnezRrX+JNx#96K+V>>=?BXoLuPN{gc7gxSwu2xI z2aRT%uc)HED?|x=M%?`C@`|SlMOZj>HGxH^g=w*k-s~A=r zrDDS&g|`D`XYraRp&|Of8rkaMC=K@idCPiiEi#K{M_n)}Ei~#0zhwRMNb`!y?tX#@ zMnpRXBlET+i{i2u8w#8_dU%6Z0i6**_8Ei+)FuG*`@prtqvN|T6@=LkFu7o0`*fdW zP|kb`_%J>L^CZ;wqm%Hj^fZHZ0k_Ev`3!R;MbepD*btws@14^EtJvd6U+zODM}EVp zjqyBj`g2kIlj2&<;6iO4YwK8|~9-3%_Y>Sv-!nUo(mjia?l} zE-^2fM8BOmktNNbNGio%05J}eSW8O9N5U;(4G53T;?(6$99ixvwEvnOi;@wI7%^P` zct@RTipvx$kuksz9ZD&{~bxUE3zSDQoAJLV4w<`e{?>qN9 zL5T0DK?`2$epxQS?b|ZF0pRs~Gp`K1j-P-!)5@}1j~r*j32xPRKb0@L#IuvDp>}vZ zKD4)KR6cW-sQ3E84{uxv5<%=`K^ztba*8vk8X+Rb_DQegR_6M0Z6=p8MPBPTO%a4_{WUM1&dE&{q=ubfCQ-$5a2TOS$4fuVLC#(?Nc9-a zICA5@&Whu1`|VPus)SJRTg2a9OMKzUs&1eS_Megbape^o@-72;G{5aaGR~o+5;y8h z!LjqdGRX*6demI-aTslwXRn2;Q_%PLfY%8$9rrGE#zD^lZh^cIkPqi+U0K77KM!7R_6JZ~Xi!7JB7&T8Z{ zc2h&SA_j8vPqpO&3u6?!GY3$=ic;=eeYZxaK3;;cujA$0uwPYYL6U;tv4K@JYreAK z3wLa$viuRw`SS1mbnyWG8BK*`DtzU?t?9qd(MSh0_r5fp=e+=x?Z4kI7XkNbexP&Z z85sQf?mxFdZK=V8PkUEN@y2N9m%;ryMh+$2=k>BwE;vlHOKbC#)p(R(+b~Weuk#xh zGe3-y-Xk!l9X!iY&=wPpFttFAPeAQOS^amAU3j`j)0&y66UOS_vVj0Gta;q zsw4Oo61a6pHUNA7=7A`(Ix2@ynF0+&k0jiuRo_QwcZrKLNFW}gYZzjt|k!5jC5ZrD8eXi!D#qvp63#rBb& zwd(P>*f=pGE~iiwwac_48m!|iUql*GO?Qi1U#u^J%b=Ql;_tH`^DZkhn+0fg&F4rc z7jH!)Y;V-=ks|_x1wq0wlaPDI(XXo^Ok-zka(Nr`q=bx6iJjWrlj@77zvx0Q;YJ2e zSk(zP?yLPo_@<4&XDfaUwuEbCX`8R25%U4Hch`G$$dZ#TMdEk-Wy}Z4O zQ=SEru?KjzcaUTi8IAm+_EO>wmBK+vr#SsKF&s|kFgyIoriuG&7=6dELyQq=Yv_md z(H!KmO`$LK3(k-zm%{}WV$;eyZx|>Ss(0N(ikCMJDH5lNZDlurN>L z{ki)ZYG%i(z}o7j*DQ}9FVE`6<=I(>2XH~hgq=-_uh(xu-h6;0hXi#f1zTllUgUuv zB<8U(bX3Wxx#apcfH0ABR8Xpse+@ykzb=O~g+{yL0`<21g$r3`B;rKx+v;0k4hZ4{ z)Cu%`pvk)+6GH8<=n6ACN{WHKr$&XCT1N2_;E=Q9FGNPNCgGb6AZOsPqBhHbm#*ha zM}j-SyQ4d~Q&=01#M}(%)P7M+)FrBog`P(qgkEC1!?K~?`*O`f97sp?54+U%bW#WM z%)uIV6y>s{iE1lqWWZP|o2NI*20bme5B|g9Os5?IK0Y*jL^QmC7|Lpi#YYqJM~&9# z;3nfsPxDc?skhBcmG<134i-t6cQkCr7BgiF-{mqR3K<;zxfR#?DoXVifMH`9IjE#S|sp%r~G zhGGfEkC#?Okk_mV-U3xLWuy3nkx%k&)`nY;?p~!+X7dWqpb3XBijE<&3p%nQ{x zFCsO?*rzxeX;d2E?!on-0EndlHx=5d9C6a41~2`_HUa0V z|9E+LW4l$@r<+!VWD94t3hE70yOW1^WpiCnwcNo}iMwWNcT59oif7-LKpyWFHU)_8 zZgbH@bN|5%Cs7|>gnP5(F_J=;rw5!M(k1cfVlS&4C#KLFJf?#U5WV-T@c>YF6?ygG zzJC18n}9tx+|~Z75UVW!AlYB%w7uVbv;$i`;vk#y^T6wG&xqSQwh%=~M~NM*7LLFa z716XQ=!$ZTDz_`l$3xmsKB01t1^}OgIUu=7W!HK{a81+#bxOn*5Irr}3N3NFS@7k* z=kqI67ab#vF%fAaJ0e7wDwxY(Y!Mz{VZ>5Dly#MwGiJyu6c_I^PVcvFgzrtq1g^e z%Wi?-{R|!DuE461Dgp~tj8wdseS*(|!+QHCb|A0|@|!bz%OQNCcZkhehk4_taIu)4 zU92y*vlwH<#AJ4uDydyy50#4$)jVU~B8aigEu-;&5g&CXBN_b2IdR z0BAs$ziSj-0Auj|0J!^rnq|;#doT!SzIs#x61ExkIiqr=o1{1^78{wo`mElnX6Fg& zlvZ1oFcs0jT6MgR-J$RRH9hqQSj>gT?1JUN3PE8DO!V6iHdZKI7Q7G&xWwg!(;uXd zXoy+rpivZdFFCCiNyrXh07Ibl96_1`+Ux1>V@a1FbWS`@)lx=!@8FfEr z+?%ZzlwRmrxPW)oU-q4!GhfmzohMebnS%w4-Etmy4Xs(Jrq(0Vn3%{G5s7&y=!NI< z%e+ElVkm2k%zRl^>lQLjnwMk4{=;Wqud-IA-_`ZrnQn{4Zw zOzk50*`(}_=qIY{A3xCX^YOJ)(kv!!!hnzy5{Qdv1SjuQS>#suOmOqt9dc=(Wjbsa0V zmZ|F5mU%?=Gy|CfG<LRUL8wL&IEa8xB063=amFU-E8xJk&WIP_AMEi+8|Af+B zLQm1Gj4iIhG>+&_f)^T(I1<>xoRmMqj(`9o{V5NT>bP*)U#8uF7+FA&dCgog`ZXXY zav!xVy^dd_Ig$&`(*`lBMK(Dn4h^f;@YIeb(f2UE0Wu332Pm6Mc2^+0F76sod0@|U zqj7F)Aep;@=7ixxgO3Sy+R#bRY~i20eqtJqg(A^Tg3vziyI$h4CUhvR{n-5fIT5>+Fa=8$eEYW^T;LTE{X*+NSQ?kxzi~T z%2|;DO_V4;BBPtNN1CjV1!9@5(hTNVI&*-RteP*a5E1bJ`GBpIXBJ-C2cZ#J0+5tu z;D2v)m3(HjeiDxLiNu7MG%X%Ufy=ce8#yXy%u#SfGT{^W3F4>AYa8cm6vJ`H`K`Mc zSD2*P7xyfB<#!e(T{TO@-7A|4udLbc`0|FMA9=Vqjq;KM!I4AMkq{%=(0l-=wM~x% zr$Iu#knt$WGUhqfECNicEXsjKOjDHlJCLh0DWO^=W=4+CWwc>dSwVxyND(bLQD4LC z1k@-38bL}wx3yq>(g?J+B49*-J1C@P?h#SpjHs+@Mf6jY5l$9eK|w{8U^*dr{b&-( z6WVF;vrLG1tBzkHnq!M-;J+coC{Y1eIz~{=W^o?&8XbEiCvZfPDE#iqlPHf0+lQcz zOI>f(Eu|Lz@^n{t;_9gIfNbgo6}xWZ8d^E?hrdM)guylgSIl_`x6BVk@_g7P;J(y)CRz&u%+!SjoGpT^CK#R#f)wmD~OrTTA) zB3Vk)744>I3DFv5|2Q*i}CEI&srj{3J|KAEaDn!LR2+5CXNxXA}d$b84z1v zc`9Ubo{Y5R9CreV5aKU@V9=46+*R;O^j6*c1%i}Se=NbbF`DZ@Yngnj)E&qA$=2dg z0J|DAjFgP(4(>=NA6<|P1HCXU!3n3XxPVc2fyb(Ag2Djs8}#GMXo=oYZE$Q@Cr(1w zwxbX@kTG8f1!G-#I62Z7Z!;qJb!!azVlg*6gXuH6ArhzkDqO4uW9^1`BLMKCejhd= z3}Jw}uYn#aTf8cHzDoRpFJ^nH(z)IsE9Cz&W&1tNSlDAQN@SsN$x>fwnY+?-4Txjg zFRZKBoITu@n}%sn^!_Z)2i(Xu3!C@0LlM7<(b_wVEekL1GQ~0ojY@q3CF~=nOu-XE$1Q9f2+Gk-;zsa7un-)dj&3b7# z1Rbt?!$!=<)yW;47=x{+-ZoVhnj(L~?jeSYC3EX0S8%}rP-Qiw_y z>pygNP2ZwkT=~pV5zEk9bELLenC3fK*qz_>930n!wH0QS#l;XRAZl`6YG(gu8f7{ckLT3L^fg^1(Oex*Kn(nh1l$s2E0Mo695uJ1;8!2GdStZ$%Cv4BS zQLxdX*K?Vs*0f1{2)rd~@w@CYnGAZOiLnaA!~kSJ$?@`F!2DQ^j=Pv*fh@2sqd@PK zVZ*T`ENo68g^dz5(JWh%oT)77Ea6|gCm&S`7Ic)KxMc=}4-H0QE_ zKdjkRnU6kvG%ke&%vSg$%+u3w?!c1f6yaYJ^DNkb(doP|ta71D!&KWMdu{kHLqG%_j?HoeXmh%rYn9xnr z>qJZ(Rovu3Z06Ml+q+SI(dy+}53~tY%d7U$;1BMBkAk}iU01%Kg_6`&rSvpqx7=AG zs-vgYq~DLbRIJiffQcx|gr92BXD)VCF7|vZ9ujcqd#|;bXCBt>u9xB9a~$0BF?-!i z4YZAhw^47ZO;)unQkuO1ow2J=taG}|0yy0cS0x0s%hk7150whL+>?0?eNVoSrEk{a zPPK~nO3A}&+bmeMjdERY)nPB-|Asec$M$fGFI+`jS)40O%)Jl&*+yZ@h-sWgdGO}G zc=O9T-#w(Oo$=u-?SDRZym@UkQ*1LDnK}h)!(wTR;?nx92J2(e`&%Pb{>ym9|4{%0 zo04h7L!SGm`lNt8|3Rz7k&!B$5&$Bza(p1h@nArN!~ZYh+kc>jBuJTNjFcFd-1sfgSs#p zS|ixT5e`c6XV9O2NUqT|O5P;;{_#4DDWs8Dp#Tv|StY`-&BT@|GSKo2vwte!TzVor zaVge)&2JYy7oIF~FbG%j+*TSkLz<5x^@>}R3<25JhmT*B7tP0a=5hPAE-a{0L-EB? z+M|$BOYRxs=^#_rXC0?+?)40Uhql4N!+y(6Agy+50p+eY4#z~ejO^YF%jHz};YGzq z`ha&3L)0}cT?WFo65htX)4b4dw@N zhF4>n<_`#?JTXF$>wxYOMM1s(fN5IqVa=MVFFTrHkfdccD5cX2%`a%zatu_n%u~K` zY7xS)EK&95W7FQIWUPSKje^N*e##qpxP1~Fr%5+L2Lk0mYwhR;3P=%-5jHGHOShbb zfxH=;?MESH2QQNm$M*?dgi#Pulr4Zr?ndkQBSpdG46tCOCt7zA;TS{sCZc;84+Jsd z>v^iB?R*nsC2qk!-)+5KP5m0CCLe*AT&=S5r4JpOs}NNgo!WovqtoyS+J*xKXA z2yFh=G^~eon0b(>O<5cdadX;t9F0h!O#b^@(vW@w)|!IV;xRYMB0i<_n67m>Cys>j z97Q>Wkxt!3889Tp*Lj3u66g2IFalO5^{OeqK?Z$EFfs>-@vUyb=iR}iM=8WOCN2pu zY$=)K@E-tmaypK>9L+(Xu%$*%{7;bPWJuKUsSJ-(-dGN<=-fdODCeA&td7!aF9eze z`Kl2|ZT6kN6$2WNu8G4FN6K!BJG)9AA{-@U{)!P27btSsP zaVJihyzJy%S)+hP*qN{x*hV?LsB}5-pEC(uwT8;$e+|bXYg;sG*vGY1-hx0BGsAbh;rv3+U5EcaXJ$Z^IPh zFTefv^ELi*RcC0?*09yia~|rO64@Fpe2w;{DP7nHrUR@tts>eJbVI`YZ*;BN?ZRdn z<($VwdwyE>Sxrd__3U#bpn#=bMI+h(w9~F*baF*%qc|9R9VhUPk_@{B91}5ZVc7F; zvG|IlQ!Ylbqq>j+fo@Mgm`FJ=-z0_7OsU*TXx_ZTQ-fXy^{{t^XzbNDgZj4m9h83N zSr@&xq;zGKuYZ!ff-@Uzr_uKzhL9GghGALIWCvHX_%<3{9kY(5=UtMF@8D=>w=@@@ z#<6BzP9J)_#0ezRAQtk0Rt6O24{Z2dN{aK&@%GsdPg^8ExOdu<;h0B0AH1FS2gk?T zC))?7l_1k7>@5Ov2DoX9^nbNLMTx~`boA5q$>HANyVK70$!_QSZFnU86?3|kEQfY# zp0*m^o+;gX3YIVrBrE|!XNtA|!QmI=?5?&ce!-mxy7GT0_R)GRJ}%4)-RZ+!S*nW3r;u zT|VPE()vt@upsXw)oMRt&;B$T!uiq*CZjwa;8hiNj%J?E`T*r{5sC;Lp+7(7u+qTJ zBg~AW{={UKY*M>SlFvG_Z3sd^$2@N++I+$ZOxvndwfHH9ia81fOR@#B*^_Co zFGUX6VUZjR9UP#P!ciU$4YW9LG|sgrtEL7NTIBeO^+IrV(=C;^)RsYgifk|`+Q%R- ziwwCyG7bs5)gmyj9~~YQq)eF=NOT!lWMt2W&wANr@R)!Ik-~|KLZG#88xkaJMAIe* z*r;Ar*NBM&jcfBldf2MW%XIP0YSTe zYZ6egl0$n`a^JdpvuyAB*FlD`oTjDzn7kDZ6SD~#b+WB%gD&O>R5YL%i5VpJ&1rZq z!wgZ>FqU*g*cPYrZSyNL34GZG_P3+3(r9|a$NNe83Y(e^LEvuqf{rnc(}xoa{g@++te*Fq*FAMQgc9ib^dN6}A-BJSgOB zvL3q`n)rnh{gj;33QQ^{ZZX9sQ?#*2!~1y5RQ596NMrcg4lRN3W@?dhR2_-+gfZFi zXL`Gnkxy|CK9MOyoROn3`wi`ZHhpGi+7j*HJdJ?>E~CLXApXc!et`+PrO7f#VSdyB zoHD3C04G_+{2f3n5jmVtCpw_k3V5I^FG*HI=?j_QNXv8Y-n|g>OpcBJnZzk(;lm7v zHoY}CUvz7U-A{`wk^^wFQlmb>|MxhL#_2Kii zwR*XUz{aXh4QMA+K@8Iowy~m8{BUX@hTUM`El_t(4qiToUhIBMEFen_Fr8aO%eBfB z?XP8fT7m&B1!|np=|fSFS$NwDo)UuiuIR(s$EVta!^^<*=I?iv^kbwK3z28BC`%FQr5obog9l)D7NjW{_I2o@@ z#;ZB#1|V&<1I=}W+puUvKk-V2#Km!%T%p}dkA@&pn;<;Ta^;!Twp^9?HSbkmTRluv zbgOUU^kGcIQB%&;dOB=U8OyUFPU$KJ7DCx_k{$-)((@R(e1^LS`hIhZmqj_JA~qAl zkMRs?1-DoOfTC%g6h%A{WIH{hb6ZI$>#v2DQL>`8v<{q;l+sG{E!?nKUwDy{9 zp{Ilrex9n)J;emE6W$d(PNiH>Y7b5ZYTE%bmTGMgMf+li16yP@8H%M1hWnJGc5NOs9CoTMa^t%Cw2O*;rDL%oDT^_ zkG!#??2TO^)4#}@_lB<0@1ydLPHhd5?&iI+b7e$xFF1#|I2F_b$dU_qe=jbB{&v`u zPa@Tu$=ygZ_K5~ly&#zH&d+-deT?h zP5LcDyUNmIB(IjJ1(%_gx#XOJ^-pHrrI%>6PLUxp))uhO_1bT327+lRhz zGSsHzWg%#nlVLboL0df)lOaX4VWt9ud}oAOFO97(x1gAU%0y@6ciTi3yx(iF#4f^d zj2?L+^HT5dBO0heR*{joNh)7iSt$z4-OB_Wb4C33L}k*zKxBnU{E|$h+7ZdH6wQDi zNroG|LS^CoHA~C@S|!2BC;dLD1fFawNjOC#DHWncbUxQYIyEhmgC(b@Z@y9DFsW9k*gH688D|hO}ksJ zaaZMMuv`#{5G-G7V>l?*ZmBs*9zs*sD`?Q|gi5p7B<(it%c8*|^^*jK?`sP!p?0$c zRrf?}MD_LaFs;+QH(kJUZ2C(N;4ldq4OOzI`g=h7k9GDDy@m=5 z^!pUN|*mpeka7w-D@)kTDLHDT%dR)bdH zc(8l%DlWS~*IfHslLBW7_!1JBuQ%0t;T{`>_kv1NSrvhHs%NJxMA#wVG&%nRy;UEMuFcT0u!CBc<=N+6px-py@VJ_DI zROgIj5ICoSu;d1Z$HMMg6tmMOY^h4HBw<8E8B`2QCI6+;f7Jxm#M6j^oH$LsKMUnM zAXkrwG8uzPYC1tf3rv@tOH_~kjk$s&w&;eED{Npn-nSVCgXUC$oE_N;`Y=7#fm(VR z&e>Xin{S9s5L8NT1@eL|(R0X476j95w+*py;TdL{64esAZCt@{BsX< z!ndf8kI+@q!ZY2}i}_<+87%N&n*cPPwx5dRv*89%cS)+!R*Nj|;W^rEg9_7YJpJ(0 za%!M=e+yRDD`f%x7}l!M{eZ2%Ti;w+|F~GTD;@7=CgI3SJ)oq&Ivkisp?uShr-p8_c@Vp{1q50ggOl2E-u`Z%~SKKtePf{E}`4UY# zZHovIJx!FOVbY6U05mwhGk9%M>e?nQ0FMjQUKof*602%!CP6b zr0wiAE((syUvwTyz6>=HdDbSziOz+M=A3Cn@wOFjVR1kCWoIyRy0h6Bjj}oYJfJWs zY|Vj+hEm*E#evSo@3Y`i32sjv0EW4Xt9zn(xhLkJCcH{=i^K7KM$9Mc>~mk+N_FRW zLWr}AK6~h#X^JBq))Xf(h>o*t98-)6x$tq=aomkalvwFYXTT2YCK;dZRJfRliW*B8zcoDTC^~;^--1w&;U;(kkGjbsbL49W^sx-d6D2P}) zO!ZC?|C^pA;L4%34`;FT|h@}w}242oq^*J?#6DnptFKbmW=K}dv+3REQ%xfk0d=*Ns zM(mg2o4Wrh&gl?Sk$0NcOYXVkp7-OPbMA#X`6nfFLi1!^j7BdkTVx`S+sH9*um zc43j=anXuzWbhrdr%JIiyUMy`K$0|!mb4r?Nz%hMXjQaJN~Psl%{2Ef%rOe*m{!e< z>KshqdF(`r2C1Znp)H+=li58_;eNXP2PXr7-fV2y4WJ z_;r6W7~KBZ>L2tZ926Dm9!j_^#EznFHbUJNqe3r0$rQvLPwXaZo4k;XoxOmx~lM<=R|O1#V@{dDiGN4H7m$;Tg*|_!E<$s z;}o(87YGy7ZZ)2^;Qwtt)ktQVK*z(5@+4{0S61r5ckC58<#Lr~4*#~?C7Je#8MYeh zl*Zl5B<@nU+_+up^IiN$5n3`N%b&9;E=81{KxRz;!8vCOl6^VJn@nFS3l(R$u!=C1 zW+%f|?M?@mzSA+c9{mBD!A45f$=p1piZh(qE-z>nf_-uwVIHgit)EtmsrtRfWMA@b@L@l%E zj7#pUwYXnCE+hQUJ1Q0K9_1BB82}H;>r{okf(wG7c}Y`2E}&Fr+Z2R%%F;I3g;fP& z(pfRdA{APP###1}fb$$ilZTdPTpCHTh7j3IBWt?5ZIW>kpbY;gxW4%WU1k{8`SLKw z$5=}@ZlhLEPwQ?*4?9_k&TiW1t495HcaUVtZ^f}V=r7&0hH9mw-mV3@f(WMENmb@$Qvh!xI9oraHB;Wb zxyodY*MI0hdW6oyp^?f2HeP{TM-n9&>hd*LacHtnM7cfCrb-Ys8L{+k6{oWUeJdmY z?f`Y>FqZOpdMLVX9+tY5b1uxS0fe`dQRUjWUMW{98=<|J4!0Jju<9x}cpYd$9Lh@X zvX=<-I12pg?MxfY9z%n2DqO3tg4#`otVh~oe9Mz&9$106BBw5Ee^JgMPPi~GN!#bJ z3iBBi)pWEET};Yo8-8MFU#z9p&IPsS#^u?L9{AFl@MO8PH7py5XQr89)OzfJSzo#c zYSiD-35(JhD`|z~=~KNqVe#w~z`TYaT(_8Jnw1D~mUisq@UFj|XfOujlt z`RsMoOR5)mo^{Pc)6)D%C1cSGE9tC;q)$*T8rGackrbvrx4q(nr}9^9Nt z)pcEWWCS@V<+#Wyq*`{({4_l{;*%=FP1%t$$!jpup&KfCh0P&ZrBLLGr(@C*o#e@? zXCn?^B@#89Vm4(0@uBdQ4lyjgDbI^tCWjGhqdmJ`)o#j2p^GERH{}M6uqv#Re6#Qb zU-MrW#}i2ihZ_$0q#$uM+eY}4!?q=nh24-`oweB}XqeMkpn)tWU|2XBPC)94!Yo#Y zxRSfj+(>{dYj3bA?ypZoyYGDu`n8)$($gkf7j@7kuD=s?qR-ip8c+ZAbg`oyS(5Z@ zrSMkJTEWwD7ei~bRxZGqa>BSrr*$C%HzIQzRO}q9ou8|~?G*{!MzdnujCb5t@!6Dt zR{vF;DTnk~M@sL*QytXz^kZ?f5(B7Vek_>riEmlznILM@Aj-5I_f$*W?pL)uj$UUn zkD9tDl;hB=LQ@{giX!E!)j$xDLAU+ERtSTY&gdxoL?d-gry_fFNMnde!a|)l&^~5Crb>LL^|B7J zkA2QU1nd}yYIPgKA^x{ewbQ8WVV0&lrJPkOhS z4wY@fTFo}VC_D@rdS(@+5!9vgrd31D0?-P)0&127DW9Tv!@o>EYrwK}Ab=eq$E3CG z;MvgW1m6Xi!x{c#12*6cRk|Qqm8FaHe+5s;w1CS1M@vOVt>9O4)*BnOwB|laO4WQu z2U;6`mDG-HtIK?Njd=pLC0s@Tk38!!w5w^B;OcP=e@C-eyRDMb@ke-}F#kO6#7J?)MlTsXR)W9whPdP>Y&#m)D(K?sQv}#PcLy%iSm733o z#*3R-P%sT4DGegApV?#~&hi`)iB?S;VT^!p5d+^=p#bPC9$}}M#8aC43(HNREKrLX zoEJsl@i@m3#}wYkl)Y|W64}|w;4KouLO#}C&=54s!tT|f3HRE+Ca%ui!6Z;{kmpEy z11_?M&5lDtz2hFP1e-A69gbkLbA(XFbhS-ainNESQ*6UCZJT$_9>cz zivhiu?3xmVh-tiTA_h_kC#+LQ7R8U$0*=LW8u4)X2ATbgw0evgT4`wLi*!lFBWPXl zdp8i6BZ}FOgX>G^!4*$P7~s-#Sw0!ZJwYkPykwJ$3)BfhE-TsR3^w{wBeN2@0wtxu zoMc_IIG==a7xSom@+VE4Q#oag^QF`=3nAon2sq%E6$B~lVTe~ ze2TY2S)%TNl8oO+_QMyhxu0}MhrPxX2(GeMCG8?m>9xbsM&m< zQO(^a-R^{M@vbAyU545iYNaUn^R%<@CpGaeaT-ctw<*Q%_<#p*v;wUK9LXU5_=O@m#S zRDF~v9`XOxy~>PSm$7;(jvD7!gy>ozM?Xv7w}U?2wCZb1vywX#&$q$Cu4B?c$4X9f z&e@sl>ZIs{j&?VREkqv9hj3^YVD>uFw=nRiS*uzxBGgA&F9Z4axLW3#?3+xj%i^n1 z%#d^6RVYIIJ!1dd@fQ>3of0{BnpP3aBTeo|)k5Ma7Aql1QXWy1>a{;D|9$a)?bY_`-;cwaAEFQ>=*Q_} zjsI2scWv$2GxHmtudi*aum35y`4R}2pm7tr|AVgo#;ah6BKy|*tLJ}tvGHR4uWRiW z8_(CD{bjiw{|Nuk#?%?a=bf8Dc)qgUUTd$d;)}L?uDOmfG`~MHh z|DObJlkqKw@HM*4U}J6lSrEJl(@~aV_JcGyP5SwDl77xw!G7Lr*J|RXdzwrz!1pcQ zCVh|5B3O`Z!k$NRYpy*oDUo;b&v-BhgX78hAnpQ`{kR*AFx%)=JJM$;D}s+Q>V|wLobt1sLDHo=9k*I{*z1{C4j5DW=hMSe>oW=-+2YQvawJv- z0V#=z?#EQ?gbX{DGhWtqy+>t))wR;40S>JpUz=i~(#M~%6UTXuumxJ=*DQ6kP3Xm?u9U?4Od8mcl0tEYEK{W4k1LE1008 zl#{#IsSA4%0orQxwkY z^pUKK_4YrKSh>T>YL$V+El}59NwgsfwVRjrpQZf&2bce!|MkWCi&snee<}Yf`5*m+ z?ZmV}iBOo^zU#J#`fD0v9?8MpU zy-tU7&hUNRh16m|W;_Q`FP;n^WZ$JT#Z~zlW9opFJpC$ia+JBb+zB>QEpBvZ=`_KB?`U}_ov$6Jk z?Zwjm^OfvBFDYF6{U|1lJtu3-`#ik-{aAkL_4?anbOmHiVa-7PX~=n(F!6Rkar{`> z1??4?N?yzrV0URltl+(9uNGchq|pV&zQYtooNWtBP-HDG(~hysI0Kc0hM-GY2?bwz zkE5)HA#PW3Z-RA4TERbnGSwFp%pJ8U*OsU?llPoU}Y-}asM*G^}w=L3>}_*3Ml_Gr)^XzkHTJ$ zi*WDBftc7*!?x8X6;gS zcub`*4pT~#CfR@W{8lY&gN`IM*o;OZ+DImenLgrJ^#c!Z4O_xrw@y9LY693c+k%S>D6Ubt+LC){ZjS1cM^x z%CJUrr}=PxZ7M4>TKQ*`CR9H%pmLbhML&!OCcmYHq(I781(=gQMePxGbWFvvC^ze5 z3}XWh4BBit3N3`$aLtC1nn1YW7{$Ca0uAfnNXo83DWR>(N-|nGkKh2%e?{3BKjGs~ zYTE&Si3XU&SzG~3J1MP+7^6IdSxgH{snBs9dU0x|_vt(WmI(-sGdeanfg9`RoNNr% zP$*H5XRxMLl#(PeAZFJ1p_hB|6Z&j^67nE|!kweL$N5o<7Mz@2lbn-&bj%ra+UZ~b zSIOb@v1am)JNdxqJ|1yLIe=O&wed5T+W1*9O}qJ0lY7U&H89HF7NPN4TTRAk)WJOE z@)xj1>rcj$oyEw?WF)j$k(9YS)UAL(ud(kpV`X`Rb99Sn)P3P17Mi#Z+{P}(u{MMA zpbD~!TBi^9RaI{V09CKN(lBPR(EZr_<>5w_kR!c1IvKucpjH)IsNZfAUTJvGKP;-| z!UsgtxK%7Vhz14MB=WE(w@BO-Of;C~n&h=Y4477g9bn$|dRbeS8q8N7 z*;Ib-5{UWUqog_dcNe;BBp#K_F{Ub*m6TTr(;_Yd#-_i@FBQpIE28MJ`Wr=r)<>E~ zE58z6uRjr*b=y}kC#K?P(DaJ|L<6M@0Hf0CfaUi$_d!s84(K!6W}uusO&#W2x{a!W zSloJnxA&YB_9&XFDN?(d=ydJfpU-w-s7IhcNgyn!PiCI=mi0$dYegpjZw`%!#CnmT zNff}d8kLlm5*k5=$mnQg@jMK!qrqSW6N#b&AtnI@x)%*bs?P=u)af!%>Oein+RAzX zE3Nf6>?zAv@wo3?On}U_Se|v#2p)G>cE8Mr14&}Lms8=s%aDW>rElraL_VU#tt^?R zBrScCp(m!G1oUfC$hNeh5co*}D5cJ8;mdf&6R#a{*$(Q9ABO84p zfz;MAXfR3^f=N@tNh&9)m*q8=Wf`qR!*PDw;>!rmjOctQ1%c@sE+2ss7k(o(EL5on zQC{#POPFIi`4lv;*fGe&1H8zgZkj0IiFn%MB(~H91w=grYTwfe91GCXB}vqaSun+_ zluEs7$ul+DaT2r4A1W0h8BtZJZ6V-Kh{I0=(4s%mQy+r}A4jre?AyOP7$nUPXi51e zX5Qs8<$9o7A3heuI^6qTS)}7xAeW~fWiLCDAi?^}ZN{{`#Wd}W9uNU<@IT&wG&|U` zSOX^$zPCJt)bN#MAx4&IEqI>MzT*bq+xV~ilCW;R;l-$IjVr;8Rq{-HVvs_4NOdL) zy)v7Qb~7$J<4Z_-K0$y^zy}lp$i(Ff0K~z!=-vDW{~b?!oDiM%_`^Nv-*9?Xce?7h z$CsS%7(PhFEXIZD=s+JOz34%?TPdn+Og+d>R29}1+L5${jc}}%t(1lk{zDUyjJZGz z@*I^n{;T1tCHL0o9Tk4mgrz253s-#Et9t(k}Q&<_ATWG)zl|>Zye3Y)B ztrja@#hE1ACqrPwakx>;hTWinu@2~D(hSn@TGeJtIQ6h3tkv=t5pP3|&ANVGXPHj; z8(C!P|3Pn8`u{WfTfIp`f=ON{Y0v(ipVV80Px-|P{{1&e-|}{nra+B@+muQ#L zuSBL=Rjm6K_Q*5FGJ>UCq)&>4uzWL!$*2hEea)gCCeVqxm<$-?r@mm+0uqh7>Frn- z%~swrQVTP@l@bow7|oDeJd<2xU@11U%T!gteMq^622v^5ShllKhXhoTXbM7%BC{_e z$T0G@0#ikGAyG!)T7P+FB$+CjjpXP3nV_-|Jc3AaM=`?%ZsVrf5F+Ssg$MPtX_sxb z7OEtzl!h#Y8cpk2gf*Wyq1oaCo(5_K3*212u*{5$9ti5Yg+vTBNfI;EAI&KT$&_!F z&K*ntuciOj(*JAu@814j>{Wc%JP_0Uzh1t8?e6=3tz-Px(*Nu0`2VkLtZh8|BLo4| z-jdIiDmTcGesRiWj@{=@qx339Z#oQz-WROBI?HC$Ac`OapC$}DkxnE zh~o#B?Ng&d)s_m+jxTRBG}~+6Ew_kjr*m}pcK7IT{}+;>Q6KcgT~}@%_G!A7%nOj@ zAvy1j*o!SnTUgqM%BaJ1sB+_EDAQ6?0_+T>CeMwA=MS6*UAHeOsj#&4xuu3Qzm#63 z+7XgzT3M8@AtRg*oi|tm8+XfM!BnRDTMkO5T#3-sx%adu-+=6 zOk-YxcgJD(GpxyJj?QTpw+}+K`bJKfQZj+g*JEfo(L4<40arfkrU{J5S>tRbVO9~P z>d@E3^gyslEP!jY=vh+4ZGpreQ~H{FldWjOC5 zP2G|;o&Ng;s7Wx?q5kebO1H}DZs-VSbtfXS>47Q8e&R5iooEo=7P3IynQ#CipNp8U zDWH5!rkJX88{Ze_gB}imE?6>~j+CFWA*B1!2;j~=3*o_L@c!fwDG}u+8f&nE^MxM! zC@*6Fa7d024gJZe%dVC9)Y#nUt04wL;vIiDW?yo&#O1VMCgb2dje8g5D$ZWZu#5Zs zxEo}b;TZ2D>`}Qij0?VnL;nKwOY2}|*jj{oVT3O2BDU{DxrQ8>Zq45g)q*(RjZPVV z)94oESTwBeZy#1sY#%{oN;n(92h5}xID$Dxr)FGN5Pc&ojT-kQ0s_T;JTSXCFhP9> z(I)kS>j1+Ef6W^LAxmB494i|$V6G|L3wpPJUE!uzacDaVW0#VU^)}1V1O4v%pkM-( ziIsmjh0JtWO$|WG2)MRpd+BFUD3c*AM@O)~Fd@olh#2cR%&1;Yg-91wBxutX6QTXY zA;ZJu4eaUbxR+mkL1F~f$~Z?Jd9DUfpv@9;#LKdaQG)z6?i;F9>;aHNxSG`i%VwDLsBjS#QixWh*D`=xFsVL~rYc46Wr>@(^1aV&SD zU7rXQ?fTCh+WpVT@I3h*9R}`8w2aRz_yMXveKf)SJp&!>_!gU;(kAQr-z@)L>Ba4WV5lBw?-KmJeB?*Q{ zOQn>gg8$Aj8kvQzhthce-Dw7y@zoObp@0{SFZTP*`FFx`l5Rh{=LKO%8P!8GP+p ze-LVO@l#&##V5S{;a-Yp>&ZRkz&siZw3f|@(%7|B1UxJCKz2JEMrh&*2e$P_%-0g^ zj&!-o8`jF{ki{q%bb&a+E`W`*Jnm*e!_wF>x@Oe9#pt=T$$C0=(wlTeE*&NqN{`k; za(eywU>{+Rv~@95W4fj5xT`3=6%Wf*=pUL;loiJsg>ec5OEg7hCUQfjFDR8d{O_n8 zXeE+t1*71<&c~S^Cp_R$#e+edMZmLq8I2tP^#=*(!{Bfvxj$4=(Hi;MuD65tSrmMZ zZs~d#LCH0BdNCm}sYQae{*%J=S-=hln}$TSX6O=y#_ZNQ`EBTd!{0Q{t+n=xrU7%; zjb!@a$Tigq|Aqbdq2%m;u5r6%KzMBwrUPgrzYFn{sr-%MJLy7PxltGhr{1U4aR*?% zahSFl#Fmq`|L#oP?5g>63w^qnY5aMs+|N~C=Y{|<_C06~fD+&e53@i^I}frvErE-e zn*|duCZoKA<~9#5@`=S6tJxxQov8nI*emQ!bl=$Fc+Hu2LTG4VO{kYdVhfklC82fw z<1^oy#5!4aP5?K_IMW3%%icBadWyW#p`&!m5|4m8hdtrFCL<7hLOvpHbK*7q$$))v zgzZ`I$!J6olr_m1p-x;(R6`Jc*RmAGZX%L9f;GANnPvukB+b9lbGn=+nTAVVxHm*$ zJeZ`>HeK4yLB@`JKo^xLBsemHyR;4vDZL;>*a0q6~2?HS=yl(x?SP-c3#{A{#OAi73#S2( z1XKq#5N`n1%GSThls8L=&fUTBTiSwFAH4a@QNO5|QNV$_EyprtT{+JDdm^NgskAmW zv0%kC&4ydg^vg5`meGIs0=f~ zHVbycL|nzA;P4a=Z#b87xllKdWS#Q#Rvbc-v?mmtwOCM$f-8zAA5zE!C$sUO^y4(k z+rjLqIV~0w@)V>pWp{BAkCQMaqiNM2J+V$_^r&0hqyr#IJ-&%AbYhD`cRUo#CkHxP zy54emrBP8}mI&gg@3PM&pZR)x=0Mz_mds_zTozz1+2kA%)Jgh}%UCqW0H&gUE0uCP zc!OTXAS}`K2S&Qxj&gL+2M%x%rtD9Bosii)(`k?S7Lh%aBLG8!5URgut>A<>$~(JF z{k$ImpnNidZza1&Dyas7>+qJd731y(#j7_^c{^meab-SO$mtgzSHQ?D8^{_lEoR*Mb0gLjmq zTKlmnvdu7ksB$6JnRmQdwC&p|x;}GX+ZR*oFWh(bmDE~$ZCx_uNEoiBF&#U?JI1!ux&J+}(%cKFz8MSDV;S*_@? z4~&Q+kjeTW_@%t7m0e7#dGG9gzx{sytn=olTG`#C-)w*TtX6hI>9;%HKjD6bG?3wE zisK_ONZfQp7u-=Q37#&a!5IC1VV?>NKnSaJhr&q=YgJ ztEeJQ!%gUW{g+Fj9vsY}Jg^B=mSY@`qc!Jff_ASMcG`C16R%iokr-JLF$fQe>G$5AV^Wkl<9-7YQ?@IYUI-u zzPTd@Ex`-4BkM2A;6gGYu?YLU)*Z}yk)v-%$>?@SijxhVWt;iL3^u+P$OT&qW%^?_ zHFbUl1S)^}51gG$FXl`eCxEK|if*$qtF4}fZ9E%>y=7#o-84Y_$}2f|5@;{iAUYRq!*Ptw;NFM%YVD{eU8WIjs7Lb0OT=FA)iNK8uYM$2zuCOZOR{Jm8Y1PP=)=cA&gzEUwjYmui?~fSl7XKW9!%${W9_HlRzq_ z&c2ac*;3`YB3_PlnlO$W196nMWI7qJ9u$wc$R}T5-OPE~i=JCd0!AeXj&~2mo>xx* zUFNRx?f%i}?he1{g4!PS%)2++r+aVZD=bgYtq|8;hsZ)cE>eSwcrR6l;Y?6StT$(+ z^s;IQBD%Jm8a7x41Rkq%Yf@5h1HV$_Lk+4}t9R;=tL4mT*;&PPzsQtFy>>r_F8TB` zw{x@ZpZnK^?VM>JXz9JfNwntM!u{*A3}hPDPHh^*k1kI`n45XV_pg;EGnqFhnwy~> zzbo*7-t<$yxFGdb$)-09VSk;hLfRa(h;4_zts>+>cpIfuhkM$983qiJe^@3fPD`S> z(^MDaF$|6T3`$K<1gT<}DsOB1Wc#O?qIux=W&hbiC(*$q!+XdrOU-ABDvvOlW%=T4 zB`raQK@Lvg$HTV|lC=(NW~+2EJt+r*cA0?`04D>`WE4Pu$j!0zT!wZJvR7GG!n1-f zDlabQ4I|QpS+;bR+(!Ls)TQUWy5%@!n}jPbm8M)Qm0c~(zFaD~X0tDsxIbQOtVH_t z>L>v!TVeI||NTF!sZ|S66%&LfFuQhtFu7?@^6;`9^(HD!y9K8W?V4`CtLO1(6|izy z+VLvk0{(1gm(6-*lG#y0?Y0Wx7Ia^qs2B(NbdR< z%)_veBuJ!cOr4b5cZgJNPLCUd$;x zgPuz5&0%mY35v6zWHCD6U!&psR*A|$HTXFP->{OMhGg@m0C!FUYQey30LpELA#-UI z_HKh-f~D$po7@kXAnd4BQOAW7KN5}es?6k@)d+I_xC4)R66|8^(uAV$IY}C&lj4~9 z_FRfxm2S`C*3J4lY?FnoDKj)H#q-dTB0mj79AdWYx6%0W`_o6~iP((^jwgN_AY`iO%Q12HX0va1kAGN{&yJJpUX-o? zWnQjGsLGw#?IqTnOH06&aNTUuO9Gec7Qb7~x+$oo88czv-oJky{3Q6#go1)}G_Ueb zA|jN;UxUzCDKG`dmdkhcw+}6zgE=-Frwqlx&6B3v`Wz^v6ZO&S*1y-n*OU=Q=jP|~ zh=oR84n4PnV#2GswfgnuYYQKz=vRemAh~EUAp}eq3!Yq0g2VQkHl7K17@!y}p_ze% zIiE)t@o1!~Jl^lX!;RQ1yV#t;))x{cD_N(-9bt6VU$!no%xxzD?Q{@5-sL@BC=8OY zN0Q}agsci2B8A+d!bI4aN$`1TPrGpV0|b z2l~kZgGj*{K1r;Qd^&0<9BOc@{Cjz*kpe%s<-~k?XcG3ThDR>i8kFvs65hy-+I4iU z#A#+7J4eCc(OGc({wz2>{bBX=bU!#g+52((Y&ZC?-Cs_Fy~E(_hur|k>HEF?-9Ph3 z2N?X%)yxOz4u9)j(u^l~*0k~V&YU(X%#epC3Gq=r(n$r`0Smnhh~b1?szxDqLtL}+ zb3|EwLG#Y2x~NL3K_dXn43Z1%D5S)d_tH(d{P`j5qy7Cw4>KX_6`&C!5jZov)9e}@ zKUa=|vX<(6yS~0|vs~34Mfs|t;j9dNnLc5kYh|BQpJK8`{#l_YYs*>0-`@G0ryAo^=i_%yorcED&=rS8rPL$)z6WG?)772{PGb4Yg(c$7TJ; zW&OwHzu$5FM?5IbAE*A~+WNC+#rltH%leOBt^VWhzVu^Rp+<}@Z>g&+c|RV39F^(H ziHAG8-=8{l6UkLX(iEc<^`>D*sdsh{k9Kz7bdF9@w&$(1l7QI#ZufBaWbf_H?vE>n zN0?mqv_wo+caS>7dD=y!xs2p-6fPsHq1bdXh~WOfGK;_{kaP>30+ zX@G?{tV9!1+J2&>>@z3A16T@wZN8{vcrb&q-WJ2~#{7=nG>QrI5tp60Ics;YJx~EYhfX(0=a^xpq z%kzJE{x8q}<-ad+{^M=-zo7kRfBSH{^#=}MG}K6raX5{s zS`=A10zk#+3N}qdo_Hyh>NGjkpfNgrz}L;q+O&FNfR7ZL(ID6!a}utgW<(652V5tW_-k(Yj(0}+rwn9 z|1E{t_QAV@vyxHU*+o`L#^n0&tQ!o+*#YG zdhwiCXJ0A#)Qr@xs1sAtlafz(KB~NqDU0Cc)9Dx7DDh!!-oROo__Yb>0f@`@FeuxX zc3{%WyVAtWqLQj`nv?Tk0}dfD9)Vc)Ml|s~Zy1 znXO`YB&M3rIN9=fG)S(sp3wp52-J0OK7ywma@0P3d$M7n)E+x@-W!!rjsUKLBq z?(ai(2p&fHWzypiYm(VeY5DO0v%p~bd9Fu94&BKpxELho;Xu{@1Pv}Y<)4{^|AQo- zNKr8=6{hp-%9nh(jb6k~sXZiC=n%C=iD$KRL%gq+(W>6u6~tO{SHlixI1}BFUijDZ zIv1*Q(JnXZ@IM_`P$GxDgLmSw-pOu9-Hua3sa8YbdiYs*e&gFPy~tX@x8Hug#$Pfh zaf#JkdYNBE2CvKGI%-+`{iYwQiV0ch%?I83qg9DvQ1KR^mm{xjHaESvX~Oj-hC#<%Ww^N;85qpAL~8hZ{=ZMt5{KEcfcVn%m{RK30&8lL&_tX^zF z_d3ECY6Hykd9C0-jr`^f@DSSI!}um1**-9VDxb!YHjHFnC}Q0EiMS zjxTRBDvFG(1h|y}XEP6E_}DKkv%@lG_z~gdQ7gE#(k_(x7L}zd7^l%?l6=;mjgu_5 z9u!q@y8Hr+sMD!!ofKXQKf90z-M7(Ynr8&FC;c#9PX6_raeop7T^38iam74}&4LDc zZvJ8G(ruD&!{xb7DAtNhlmBN5d;6XlFBV&Lyug}`&MCk<%e_O zpT=Y{XCk4`v%RPxeFtL-RxV{Yrg;H~U~DRwV;hW*Vhu=8I~CHI1Ib?u1xl|+iDAhK z=x{1dx(2%7yWlB`U29KUdI&AqSrkw=jnDi2c!48bH*!Rv7h{vBY%9l-MQrX^F(xI; zFjw;B85f-G6G@>ev#etH6`RE)XNffcUHgA;w z+2gh0(B_w{N6e%LMi)~A|6clcF73Zd`|r~JyZrY>?7xF>6y4;HlJPb=UQgyt0qZu+9y2?rYK&oIO7yF;<>4sM16)kT!2 z5#_(#J=2K@L5i2s$|1+~X#$BQC%oR1#@tpH{bq(bxG5=sGYfKd2f7o9tkaBzV6 zEI~o|3{t*}+lQ@4=hURTLvlxSsmg(S{m33wz9!~mRS$ylsum6e^T+EF!uTS&DF!Nu zs^^l0&No=wQafUC&B5yAMw(2`q+Dtx0G_{?1HqIooRuT#0a5|^Y48`>Sv{cHA>cU| zn!U$1tJ-BVvFSnMW*tPYRkqfAZ0th?DAkB`b~n`pEZf-r4u)OsKU%8ROZ|VT|1b6b z<-eu=uk?TRVtK6mA1|LhFUEf^^M5S;zaCHZ_mgKzZX* zySQE%4x^RqB9q!9>>Xu3Mc^z}s?O0_%6iY4J*p;(wVi@S8g-M4QT$JIJ-ZE#FT+uu z3=bofJ+X@yGd4CbtiweH#&~dt7$m|&p_Cu=Y!u}yp?Ua5KRh=N-|B~5^KeH$l=3Ru zhn@YiT}JoHAdh5XN0b9woVBzwogOl~O1UGJj3g>!NeOYZ-=A&&&^cz8D$mWTEYXQ% zIH?=_ZN7i={_yPm`o_x3=PUAc{mY2qOZjgp|1IUerTjNb{=3KZ&+Pmk#rWT~^^K+c zx0L@L`Rd1Y&i}U5rQdprcHrVua!2wXlQiW&ofO)oeKOzbr%U_K^88<(|I2^Bwevsi z{qxSJ|IG9M<%@OC{{v|8#jEA{|FzElrT>SyAH0u$$CF@B!v45$$o7cJNvz<`iu-Zb z6dB3dPCvBwR9S|Qzv8YcU#tEbN3dXNjD{F7YD}`|WaqyNRZqSjgml#&5Hzj{0yx7z z@^ms(0Z1@Y3}Tz4BS1XW1p#2eMT&NlSw$}@D>jqer5kOrzckiH;9q(2IT~$Ey{a$0 zrw0gBw|U?_CwN#%0{`p1Q-i?z(DKv1Z*Cw=jJ`qQqwkv-GuU8|>IF1vmIblsL~)He zqkl6P0;6FUX^c3;ARpBOg9MrfE?alPC-fMpqD(rtOII4k(FuZBt>{hPB|rLwJoLMw z!vg2Mi`N|%u;AUT!+Yya-6n~W#)J#2xl1Rd*X+(<6mF-ENHwzhDG*#`SJhYi(_H46 z5U}hB{V=7Fc)38<&aloA`R>3j$GWUswl%oapHQ4EZU6S&_WwLv`->1E zR|e1S@`KDiEvF;;?W8&O{6O>GYio^SMg{h?nBt@ zGPyaw-VMv6fB$>>%`NSJOZ(r_{#trdZ5zj;@c;T0D0BTY*-*44+ewdF z_Z-ER6W!Xf9!XBp$MvNl5~6Sn61)JYm-h6#-+9@W*~J1RB|GUkp?_@z0=u)bv$M0a zv$HekOf?(FlvMoP>B+Gh8LHfa-sG^wxW2@cwAgh%Pn?bN0?@$x!WdoV?b#3X5hDQr zA)@`Gmu$R?$7yar4z^Cu;LHB&w?_vKwnVy((+QJVa7nO z;{%4-ajIwR z&mf9^y2O;TY4s3x-7~x}vzS3QvRmY`BA(z!sFKV^CO*H4cpC z0T@HUPq1?YFe))I@DFaFG2&RC+oM#*D$y`S35#)i$;p*Dz6`VsJXjzyDl>Rx^E|`S zEv&ex@>3=*7&_?I>&56(im8*6s|4H67n4ghG-$A}NRDqoH%>?W2=B_2l-D>~%*Qm8 zNlH$0m`Qw=TvOn*Brb}0HbFSa4a`oYF@@XeqaAs4iQfQ|^a?-V+@zGvK)RSfQ1mEd z2oNy0z$D1HUqy!%Vlpq13&39!!zD$@CMg&aASX>&V7hEE#%8opUGa+B^Y~crrZiNBa z{=vo$m_9q5Ju%PB>)r6{C*jvm!>^x(Uw;#R{cZU5AHuKyE&LkhQTTmBhjw(2SfbeX z4PDx4=+jO^r*^&#!{62Tuu!w_4L&H=#QSdpQr~%=bxd-*TCv2G>J2H4mq4%S4o|LnP75q$Rq!qBB19<`xC%w2e zhg&Kz&?eL6X8y#D^sth;R|$GXg-9!sv5952pv$vB zsrV{RGZLB9i^0~sxE^wcU}dCn_y;fwLdQ|C{LtsOyYlTWzLo95$DQb({U$KyK``eb zFuMqb7G}DAJc~0}Ml&3+i^XiDW8mRRg93dqOJR^GO8}ib@^xiW5IAt3ic^_Vym%Ex zABa7Qls&0jTwJ6#7^H}PUeg+nCRh;wki$I@I2n|yv)6(D5-|9&jXyvK?P`!RCLO)_ z+Rv8YTyHh$1|k(1ur=4fK<7oMhfTwO=dk*IYKaPFzx#m--2nUCcXKiX+}>69ts`@v z`zNyUJ;db`RDiVNH5Zxh$i(2XN)U8TrvZgE9{?&S<^&R_CFwiF_a#gn()W^^cvNM# zq`I>jldy4bu%(X?_U#>VGYj+#JWppvhCriM-Q;h2(~!cn!E=xz&oX2$tyD5A@udL~ zp44(f7e>CfM|&bv+n8>|r?XpxI06zgn3C_Xvli>(X*BdGdP$lssiiChK9=JGb%~FC zRxPKr8G{D+0ST7i`R|;>n=%)R`)HPYsA@m z=;a61O{qU=NesOsX^Rx@rh{bAxBHLY9eYL5bX{JMdmN7QCUDU`P>{pO(-*}G>huy- z;TMYn?XB{ZS?dC-S82W|#r2;aa31o8trc;#K50SFwyh64yBr>KSctdb z_cko>h@7G;ulKez!wqWzng}9LY>gpaVw5`mrj~pT^ju>z5H9&BjfIJUX{6e)X+mrh zAj7c<=s%()iYWNB#nlRhY!G`Y1r-bgXfs)I1@z=Wr(n^@R(PXHDH6EdMSa9hVt@q_ zy{Myfc#LZ(%qnzKD$~g<3C}INs7jPo$@^kRqqR1E*y?iJ9NE1#ZrW|ZE^_)*>(dO+ z%kA-1q5Z-15_g?Egq`tqOh;Qv+AA>LN`PFXw7e1*3|xBf4n_01cZ17B)S3lcoqj7J z@w_QSr4SUV#_xX*eT=7NBo5!cieA!cKSmqOEuHvL;vjTZa#oEYu<+1`VU!rK*H1)T zl&_Sw1u6rJdysy!RB3M@C8qNVSFm`7dhO^k8GY&lZPAVg%_A%UDF%9T*mev_3IChx z=rL&tVrFzHcK}5~CY5NC=VLNN%#uvN;&mCVd-38X&C0P2)qCPAv_CLSua#QmWW#a@jO{xwl;Wst z4%+=pjer*xRt?K@QRBS}?ZZ}3N5#WeRT z+oqt`>*@N41vt%*=0n&dbJ3_-`_@4LRZ61fXe_~^k;J}NNYnwjrTp%A@x9d* zJ5gH*-v&U78FI|bkb(9e>bbY&1%)=lt)8|*<0>_U=g$KRd>d6^$)D4fH@Bu*Gy-P4 z-&{gPziFirv^5AzLS7_yg03`(37-)2tQTD>{7I zc^rLz&f2!pyvt;SyYtgm$6rUNJN3PKFmc&+G|wyEJ?;G4?qKJeKMbBe9_$XB(j&6g z0f~*>PI4mb7d2(;mC$^(&-Cj?^U2`w`0A z677CQ5*3<64W(roPq$Czi|SJyaaqLcCG}}XxurEU4btT(|4xHAy^PU9=gH1mdLA;& zd#o=6><3R9_Ev`#G;?o-SY3FpF17y^g}6ssO-;GfcC5X~AMa)oy4i?a4wLX2HddtP z=sjHBuru_ z&#}(ADDD{ib5U|UVo+t&O$L)eU%|=p2_;iAFTqiR7%MfSXG%g8Q62nH0B z0~cE{%iNe7*>26}g(h6|SNgD3-PvkAajp4DAiebQZW%a5Wsim~6zlCpZ)b@S% z%x6H_fhfPL%i~d2PKRskt7l6`(HlNe%W`$SMVr|HLn$W7gA`CO@UcwolxTmZu1;N~ z&={Ji^=Xhj3?f}GLPu3{*y9UeW1zA0Q!rQPtP|e-PaKpD`PVpfAlmVt-9GiO<@A8e zV@PW`0Ka1^^zL8{s~Jy~J=LeFJJQrI&x+QL!b;}O>x2xNFu!OBiP*|Zl_x*D5nk6v z&I6?FJ&N>z`30IX`EWy6rqgP$)Ycg4NrUy2j@ZowP8O1XG?Ij;sWvLmfelJZl3t`2 zdC=6M8QOc}(WRP%*Ov+Xz0DWKoJlGc8S(Deod$C)nrdCK$Bv%r7*LBWt!lAM>()m@ z`GYYm8nJ7{7&-=vg`V5fo*7)CARkM4km!(#8#5MTV77pAs zBD(7-3sjy|JM~Yy>eH_GNx3=I(pdUt+S<Y9ePA#R##g_&7g~$556eZz^ z4k~q82Wqp7uF|+BcGO8d`tpraToVg3D{DRNQ7&8%IVci+ghmRSp+)d@cgH}`> zjQ~f6+{k?^q)BMvZ?EYXPfvsTZpm{Kl>b65gEbR@np}b}Tw5_7&-`tx)oj8`WW8G0 z=P-c$a{oj5kCLI|C}~R&P?)atlzWYM0v#{o62l*KqXG?(8d)^rp`!V>Hp5XN50{>| zjYhMpqi<|%uFLh-vFWtsj{6Chd}`Xc&IORs`T zkR!O5WYW0GjXJ|CKG)|)vzo0`653-Kc`Jd{4!YdlIB5N8b&O^^p41E-CY!>$vhZ$m z;jmbrKbLnA3E}szDeu)UZ}H;3cKXURMRIs^AO5%YZxjDx6aQlq|6>#X zV`=;k+Tnbe{7<_(_59CIHt|1xwfMik#n8WAv=6-X`$8XpU8ZAp=}%$ni|LOqi?~b^ z>Yqr0q&3EY$tQI3ifjz8ZQ9mgn%b!o?shRxiZ0h=r=Z@z)S$AiwK8O#SSi$6M#hj} zb6ktc%i<>brM7f%>`UL)q3p6yp*@NkHwp?UKWy@jZuGy6{hQ$(*=5hs9M?~Pkl7`7Sv~TICZLDL-$WkkMEO(Li@5Y;O`dY2~K7=>iC+0xP6sX z<*#pWGH`sSP5fzlfsq|I$!+_C_eoPB@CnIyH5*42%YCUXlAXui5 zw?{WVUz8+;SBq^dLe=9Jk$b=ypPFNX9jkg^no@iEb-kkfh+`UB!O6k@_wL~IEEJ}hp*3{Lf~q@Cw;^p3vwjzs$*+TcRrqN2Ryeots_6Y%lJ!zDS4?lqCF0h$s= zd-cxQZUxIwtM?cu8JWDnSw2!i1p%{NmX5HjPmIe55=C#kvKiuKRLs7vBv2JeVEE1*1M0*n%pfflPr7QDK9&H4fVPD z+ZO2kD|x;)9_!F^QI=dKc$}M7gXkv;Ex`df$9jF|AP{|ZL0v!rF*71B&#Jg3_bY&k ze&J9(p(vLeo~($;g#i9D1I=Z!aA2VO9EvQ=xs(;KAHP*G_DvZ89=M~V*DA!DXlYjg zI@)YfPxhJ#O?~yt3Qz)R#`* zS7>aK8}(ryFI2#Xvg?=JTb`JPtYSW(nmMi$XBjQZnDk@4ju@b}C*~D)Wn=%@*nc+m zpUuC24f~Ig<##dxEwTSRdHnQS-~RJ#XOsVPWB>V=u>UwGgMG64^dsXW`}r(0M!j+@ zGXd+%1ymT4CPhy|o$A{tgyAfjx@rWvw|)*4L72J-K6_=TG5YMV)x2tzSN#S^Z|26GA*`T3x7Jq^3px(3UKMy1 z59%(TS6nbKYxo1t8h-v9COr-7z9v0`Q*bQXhr^{&r|%I>wZBmaH|PJG{oiK)xB2(m z-2W}^e)zw9|F`pO_nDvnZTFjv|L3o^|NE_M{TlaW^^GBhN^`e`+EtXx&S#(smb&=y z(aVEZr&R_En4!}Pm@nkl*&g8b?Ej{1SvIx>klm59gZLzo|v z#N>|l^xi0}`10wFk8H@VRBw80OYYvLnp8C%dNx)Xw5_yy%3lCz6lt!DcN~p2Z9)3# zG%EdVJCk<@pgzAl`0wS%y3zkO`rk(X+vtBw^uIDGFrMKTvH$Oa(&yX%pMJZu(f>C3 z-@l6f$3}nuB*17JM$?07@)D-WX;MjRV@Tt}*`$K0Ve+T+=6S~^5gwTN)=@eYXi4}R zDUBq-ids!#Z!rA$oXTwZj$^XGKmNI8gFjKbXu_w5tu5nHgniJ6s#seBV~djTuCv9` zkyQju(*w>XtBduvjECORZoOA^KIs1W)LuqvO-!8@5CQ9V(j(RFkS@`eqw01{0MU<{ zh^}(1du<5kRuA-kF%Ed%DGGP6(y#=bw(@3 zr1UAd?Q;f8^kmCm#&sPWc@UvX)3VzWdj2BdB)J%C9I2u;Itm@9cW^-*2AnZtj2mTKfOfZTQc(QS@D0z>*94eNuqR ze?e(3(I2K75B?`@1n5p`tkMt5fGOA>T?-F8KZdW{IjoBu)!CXX*TZmB20~#BVPfEFwn^!VsK`H z8dL6tSso=97sy#D@(3`(RjPugBirfR%jdToeRMJ@k_o)Qs94AuIzr$y{*(ZZQVMsB zluihn*vS;x8(rj~aJRahtLUl#6zU=NB|GhN-73nG7z5j0=Q6;cOj(M-2I2QO1=K0T zw~pCLxt_}20E2@!>|C%!pTf63|K^(~Pvi!M5=A|y5@JV(L;IX6tg;bFV1_mCGSI%B z0|Q(D0}L$#kf9yfRGHY7bsba<| zGYIEXUA~-4q!lI$BK6 zVY&mxFOJ{wCmySKs*CJT^93f|y@2UE%?k|D4NsH!{I*hs@se7-G`%I6M>*qDS>q;| z4d@6d!je)(1DuJDz&^q z`g-!oJpy#vvbF%IXQUT>a(z7Ml@9_C3|?McA=2e&2$boB1zHLH&EJjDbS6siBEEl6rQ_X|go+BFPJo!5%0 zQg@vetMhm^hn{KQa3A9yD5koy+o&WmG+7gB;iF{cviFJfYwVj ztxJr*h84yM%9CU^O3?!c-*9MdPPc=4I&eE+4Z1$SRcmE=IV`JN%Rr|p?l|Qdxg_ygVqo$aM}SK07P*ULN{t8Kx?ZOl z-38OHX%Yh=2%0k=vC(2Nn;|2>d;$Uy*QI!xhYjK!n}N8djU7-=WyQcLq378pl*{b4 zQZ6Elhiu#og$2~-RtX%I$Mpqott&h{Dh(oI#lUPF6SYa@8i0bLi{3^*G%_q14XMyW zKFaVUoc=0B0>8kl8jBff7KF${pYt9f2{oPn$n)(Q!)Egpcvt?wxE8l0*>ppQq!I?= zqg?@=!BV(De<_tPr%D5Qf!xhtsUX?H!Vm<+(b>sgkKZ01ooS1N&Ah1Md!UtIcH0xo zXylQ@h<%o>>h!CmYcT+f`<&5re(nPEjKZe0O+73HI-%x7Q)t_o!m7`5H@^^Mc{G}i z4cieRN@NxrH^ZS8<+G=4MHL`1!Ma$?Myxob)h*{ZBD_LPB1LSo$A>SY9oUOh?ox@R z>jSI9IedmueeveyFE^Y~mlh!@vm1p63G5tADMMi?CyO`Mmy)S8k@ODzG7$dmbzo0!r zD)tQ@jWXWwUFg&eB|IH?rBYgtQ-rORwt842+Gd4qDq&Z@h7vdt8!KNmQqwZ6#(oWp zP7~MslC~Xh;B_7;@4GjwtzY$UHh{9D>3zTv@X-RwH-Hf%>V2_eqHQw#q1i5@rs7>p zS6uae)+-OjN(L-<*cxH!0-j0Bt=$0JPWDFdwn(r6`?;YBkF5gcv|5vHm0+`Ok(HX( zJR8ISYA&xTJ)2rlR^<)mJIx+aYr%Qk&`~?zRw1t`9i*)J+Q^Sa5UjTRhl~(5ALRjorm4 zF6-XWE-pc=X&U#IU{J@rH^N-LKOTc9FONYv?dqd`Z7$!B=I|P|?m78ZMB`h)5&j+? zqf0N=OG=eUIpti+F`9x$N$3Px1bRjNa51Z%!)^e(_q+qUFpLub8UiSY(j%C43V~!z z3zd4KO$cdY#;WydL0S_1wXc=hXh_UK<`@ec2Y%XelGU>s3%gTTWD2WHV{c`YSnN5c z1{ZPQWOP`Atftec3HgwY<*~fQ$ChnW#OAHhAuzBU?u&*vu5Eldw}BySR!+m37IjyN zs1wJ`6#*|$On?r2-KwF4qv$In3oYVAL7ILBwi#vok*5>xqq z@4xuVshPz1%`_^zqd7>9{ybYcfd#+Av=G0<#QiRVIgP$l$uehko^ew(1;pfnN&13J z{%9~Dc&7+YolRMb!vys5nd0WSI8Uo0E^awRD?z38WuPLaUDa3O zYSpNj75szF%VddvkR@S*TM&R~STYT_=?PAZrty~bEZf@JmMdjRnryZZai!A+Q6ba@ zzGt7M2jlO*v+hb%q23k4sCFJ0DrXbg%$M?Zhip>S5IPUP*lLTmty>91*=~ewG(LXmc!-^&c?0Be)f2D=;+kMWLZf zS+UC!O^Ea%!8k9)8{?m7Z@PuIeX$xlSonZ~xM+54K*lVxZK&u%1VI0AadF`UF%j!| z6EbQsZpDSmFe?1)VIqNq5ix6i10GhtXv4(HaRQ-#x!8E1g7`>vrU4_ZzO><^#Vl4= z&n_+wkRT>jOCmg$=(GOx#RP5MD;r|(yCNM=Tp7wAj?;P49C6Q6t7?ht9?K7SzBL=& zQtU+cLtd76;6xt=D)C*j2nRVWMK-?Ju`hO98O|MRlHB7Yi*Gw(Hw&`VIP$X^^zz{K z{$G1G=p_{+Df_rV06(_b{aB*_5Xe;jAmzq0Esp%1CXDKxwH&tk% z=1z#3JF8H0r%uhCRj9e+QPWriY1)0Icy*E|CGXw2YwTjel=r^vsM$J6DxAN1x*BcB zyIn3wg$7YmY%dDzWeJ&D-~hh_Vz6xWU^#)xH$B*u;#FEC*KwA~MS;JZqCMYTE8ZR- z9I0{d;2jj!Y+t;7dwTGa-@;m7CS!j8-Tvv}3sst&*2tWxJdDZhBh6=S1BXX1-o80J z`krLJ@&IFCZ{MAL|5km;7u6)EFAWO|A4N#}pE@O`zxFjJ;s7jEmMa@-;Xr>;5YdyN zubfrLdn}#1b|4R)ru;uTShExDDF__{0^bREopg?G()m9dH2s-g$fM}ux*mFqB4Kd& zS~a2#TmOq|JN4~(jv>;P4n079G5K?B&omb%H#HJ?I5|Skni8@pZw+J+nM zPAHMLOx_>#qRVtNZhvf=MyDZ+H3zQ)8IqKvBBdE(sZc^+g#V3GN=E_It@VWc=rS&m zy}JtdLvyO|(2tf)haX(bJA3dgleTziAosS!DWbyFtD&&eUmNUCnhHl&n)whqD9bua z>WkdnwfcC!v(wx)w;3m_jU_q;wXwe%3h9yYU#%335ed~ChaA5e0m?;%5CyGa*l;e> zP?`=@tiGm49x)GS`3QivQ>I@A7ksCqv-tFoMZ(8NAxbrFKm%i9juXD38zkhAt!Lg= z+Yq6k9pDTW8#N)ERt7CAwZKLa)^rLISXx0z=B|DMlP4ES9`M z2}=#^yQqGDw<=B)UYnkvV5(Vkp>Q+4g_8n>Z~S(WMwL>CddHvjL_XoUBqpu|Obqc12h zm}=Z41}WOfumS+hDuWwRixl3ox9?75FfDTcW5;E$O#)WqUq<>l8^gdD-1w<~)epo3G(`u^ESi?n1TSqm2|MSL zS?RLv@&3vFn^R_7O?wr2%@c+uMqsBCh>>^IM^AqKEF40 zg@J?K4K7n&MZE;RVV(nFY#{N|fWtH@{^M8t!zJJnd!i$WfjOMt!oWcec}oPf>#Eo4 zr6-JFK~FPb25a^;YxFcz+ahgWg<|bq4cIy8JN^OCP_i-5q+8O2SxCxsRoAoY1Y?0o zLIvH!xEQ?VhvlZkajPkD++;)?FLNIfB^VXgi$rzDvA~Vnsm;_vM_hUkbUcRtT<66& z;Cwqyph&^D2smkU0h)C_&2H80bTk#sRsF%%7T&-`TV$N^?K@qFY)HCXKT2k!;&zUA z*44=XqQOxIw$wL(%eN3!ytVx|-ID$N@P1x$krxaU$=wadn-UCA_vbw}$TU0iN*1}F z*Q@S&o5QQtWbswNX*IwozDp+$=Yb|5oou^<+UN|8OMycl7s{$svQ{EqTJX<3NvHVX z^HR=~k?Oui)}iSpmgaxQf1CKfoA|#b{_oR&d$#lJ+fDr6P5fUQ|94TP**yaQFUkM$ z_}Mp{|NGgu-#mNz4aNWcc9Z}6*NgxATM7M3KEIc7k&K5JRj_NeE6Tc$zXvAffjKsi z!5neNO~r(xTDccAf;(pK&HBGt|2OgfH~;?C*Z({z%Xs`{*ZJ`2W8`Xn$RxtkMcYm`}F0hCd%4onnHnhm(iVUgUer_oIi&LwyP$>^kti z!uJN!;PmY9?8Og9`gG4k4gl?>N=r5xEj`erOBn1RhJ^7QC$NBXcVmcVD7H<_6|1KvVUYutA@%E6<6WfrtvJjpp0;Qj9Mn*f^}8g+T+8-oWKx@gBzq! z_u-?-9!uM5&$RHcFBt4OaYyg{;}4!^wi)&P@gZPDck6mOdT=tYqQluZxj86`yzrFL zrZ5O(<_p|M0$NtNaH`=RQ1ZH}y2ZF0v+6m=N&|}JV<`8$vzVpmi_z(A6pD@gZ=?Tj z^#9Gj|8DyK#l5xvrTYKVZ=O8%_5UZkk2m`NuVw$+=>I|eUp?dO7&I&Y@z`YO$~j5a z*4EkEG##=5r3-?X+2unZO0aa>i=}sZn@87<< zdg5#o5D{-+W$i^^g-BDI@gbRfcX7qKs!tKXi&#}f}l2l!C;qB-`E{3)czuz`dv$oT* zUq@nItB4=B>4@HOXR!&9kDmDe&<8x-jk4G9$S)eG@u!D)4uiaYxcdZQbnw0>_W zBCYI*j!{crx~&aRQKtcV6OKrz>Ndu@GbPsBBK3h_!*M7`&QO1#zne8}qyKI6zoq)$ zA9gnS-$ws4`rmwV@6(@U`ror}p6rWy`MSDyC`A1Z~5w-QUN> z1f=TkfB)%P2Hb-=O9A(&Za?bCsf{5XMSK@kBc9!M-;0nJH+@Emx_)CV){;ISdNNT+ z5o2QKra8~hM06i)MNdwr8*RP#Y5(Z#c>e?jE!+H?|M3fmuP7DA(+JAF! zdc6PQ;FJKr|KP_-aahR47O4|O?`}0NUQ8?fTCp7C;Y3h?VeN_CyuMd%E_9d0NSrBf zhw!CX1RQ1zV&1SNC*b$ofJa{ks3JOq(Z6(OaL&t3chf{wQ5}{Sn7*-oz!QRCeSdItc=d#mAIFsr;b6}A{HRSADtcX6 zjq^oS3-yDa&A}f`p6jpm>FfF8!rbxC0l`x~R-@B18#d@-z#+7t^kAIeIa>E2o%TKK zd0}^;L_eZK!5(3N4fJZL`^ow+X<1$-T|;q!iy^u=-P38F`q6?yzq$F5$&s4_e62Pp ze3p2hVWD!GkRYmFzJ1fR-8MYxOmci-;_734Re=*$DG8F+=Bjn;<`+#MK-&o~0U7`l zXsWA^*6Z`bv`z|$KUt`^G$ba65%HHiv89@|;gmna$F@FC-vzRJ2oDgpM3x{X=X&lm z#y>!6MFEAx$|EEAj5<+9R1%@@`i+OjXLJD+uJ$?fW$VR(#& zgyCoQN`Ez>&G{KW!=_`d^(MRP)qFk zqv^O$9v5;(aF%rVubTuABhA8}EurjWS1tKG0m=hg+eQ&fimPe zKx3a4Mdm0By!%qM+qS|`5&8)Kl@TV%{RjW7!ZqovT$0T%Wi(pzxnsrw6)gymvB82g zX0m{SHzaQunBM5NNzYL2GK@K&JX>Zp*X&D2#qq& z($gwbO>Ra@sG2ZBZBzvyHdlhpMs0B2YRKZJ@a*duWg}GzfU1y_wiRCt)FkQF7t>X$ zJHq5<y>MW?XG9$ zT*x<_TS|;;z|5peU&UEERs_doC1f^i9!4AaSBXc}_@DuB!(a^yqHX8y@_b8hSz?{Dx=jbCe;XRC9t zOtG8l44k0EY%EEgYCLub#b3e^ftMBSk*Db`sPr~ zPb%!9Ag>pA3Fz&Yuz&gzKt2#7#b~Y3bJ+~5hKn!srVLx?-Dx{#YZ{iXF3d`ep@H2{ zrv5S=MX!(ka`;kZ(|mdGYX9Bqv*CBRciY|(Z(i@8d1a1YpQu!uEz6s^k&x*GI56Ki zP`JO=(dv!}H!R?(nv|Y-v$jT3IOJJ!jeiec1{B)xsw&foDE^iwRG1+|p&c?(VhStz zj02|%Iuc*7Fn{hJdb#>s5ny#JF5VL%$O9SO12x=tJ$Wpq#v*gms0^p^XqY8&F++>z zbDOV&zg}hWY*@vU34isLewZ9rX`Bu7PxWt@8fgey|0+9#yj^p!zl4kn>#|yXRB$h9* z_ZmT1M;-h^Hz}xaq3&>R}07T}TjUT@3`qx7$hH(s}ZGNegNz@iYN` zE4!}+;v1HS#=PnaN{23WF{)dx*VB!S(-Kn~zS)2AS}!Rr$0Qn_IQ#wa?Co){d3AC! zMkOql6j&eQ@W+mg;x*9?FitA3K7^%r`le^B{uhj^*hNS90kV>}h4?2r9u&WWio z10{bCPG4(Ad+#_DpV6wy@ z1b7>4z~EU~oL$GaCACx;ry1>Y^cAPJIZt3KHCB!+6SRn+hi(O86UCUr0q%M$MG7f3E4t9MpZTBWZm zbjC%hCi_=MtycAmASlFoz&@kCwqR-Nuf;32niz6^w>GD=Rg;|ZRrGrQ2xHaHk??Be zP4TWkkwlj;5i?H1P8ukbWK-aIlI7Qfu!seQ??_V?ZCUpF_}}k(xIUQs5Cdpyx~O8C zwpT10{avvcJ-=4+eP5V8&=BUmB2PfqI$~gCabzgH89&%nC&{4ol>OmWK5@n20{MzZuV)GU(b9y;p#3s zqjc8HSR~xAi zo9$U|rn5W;ly42}vW+rr*lDVeR4ZYPyd{iae>BQrY)2JiWp86v*SR(6mYuCNc}k8# z6KhGoH+fdIi&wLkN=2Eo-6GnH{v7SP9t#dbG!Rrr;uDfK*aInYSG8&=IY$FsQ*V$# zyd&ly-mMeGm4UrqNY85T)moBPb`?_yi=S4G;6X+p`|WvLrX%{ub7l}lXSefo6ld8j z9jj`S1RpU~DQ3FGyT&;TOhs45sa}HKnTydSn(9BE#Mj3!KK7%JKPR&v-@pTm^T(Iu zZT=#k;T7+s_&7kO*)Jw)sFe>-1^Oah2j^o^;t;$k&J#W2d(=~PGj}mdfhZ&}Qas)* zVT+X|&^*)Xqq-=}`=}Xhpi~2t!}s#G#C{@jS7}i#;BgvbQU;jASQa{ceVL9fEdsEQ z%NN-g+nyrz#|+x>*6Y3{(Gq9xp{Wn5t&g(EM}0q@@>3f$r~%WaB>j0xDS!=?{g)OVPv%19F&lMjPrO3OT>l*X4i6j_w;#5b*JxQmF>p#iC1vgxQq3>GuI zl9OJfcy0o$kzcdPNcW-0M`#>U@AM5VzD$ME8oo_t_97!PNm^0ncRZdVM$i^G#SV4> z_<}sK8Mu269Ic*d`DvI=L2!jhh~FdrPBun6hq64ztNmz=EEY3Pj%|na)Re={Q~h@* zM*|jILFXv*LnnRseNUYW? zx*_|1F<#2|75rjagBW-*GK@Zhmc%=#hX#=F3!sy(&c;J$BwF({pUnx?|_ljDgEd#=JX*REW16ZDu!g|ntelKg;^LzMMw}J_JSl4@F*MyaN zWs9%bJ6o}-m$t+jz4gI1fmBOT{WKge{}H!}{KIWbTecpCJ4Ch`gzm&w(b?OVZ}%|J z*GOt)q$7fc#Y!=VMAXGf40i-d4mw()W|(LA?lwzQe1)=kyll&TnBD2kfNOAa<(?R- zaatotsi&_6(!Q%kC=E2RAk2s^!M2H~z96ES6D^pQi2JsXHlvi47ns7)19kPw>lW5? zYP9zO;55+M0pem*H|MNi@ljVeR4->^ua{bFH7elMsAX=c!tH%Sq*O_Rm)}gJOBnKB zjXcZ(nzOFmfL@?R`+@eIjwuZZS39LdaaTK}RLl8n+~!^D+#w0xkxXt<@*SZLqBedr zFkAYY08f1IFO&Gb0_qdxap-pTI%vRr>~hi(V`cVWp$kK*Oy@zl_n@@qWN{E;LpD3= zkQ1B-d7C0a#j{D$ecX?B9zX6iHk4jGNP|tKJ?Acpu7zF4d^eJWQV%~blK7M1NK>t; z3ajn3uWHx_Y4+wzX4bn|gXZ0@y*;tv)!L zKs{ffZFsDZ`ZPm>_lZnlg`J+upT#-8%~Ffx?(~W3)-p@hQnDwso;?MP6mBR$1k`X` zpreEgPWFGQL2{KxX`kTGQLTNVR-se$*Y@hi|KFH;SLvH1&a-sXdMt{es4*CjI!n*^8iU?q6;+hU_N=`C$_Vyj#ORVSDTNGQ zhY}d7Zrg%TZN1z-+po8TF?H5%V*&DX8*6IN2e0@48W`4blEt@cL#J8_@;(Tk?_M9& z@s5G7K#^LXX}BJzXQYs>fc^2w!HfN~gO?Gqp$&R45cf&;_M*pwrzo@D z!MeIPzulvHdmlxd*K`7H4){>a-YFq#_F7WESzO-#_KZMxK4Dy2F3yBIr~9_{b(x-! z`2=a%YWDNN(Hg6nSQCVmx*%0_N0`84Jp0{!_KjrAZ#{RRH?!4=i|7#{P_fpgIim}rMcis{r>dF*ZW8O z)~eGF6a0*rNd-@5i#ON|PR|a{Ui<*1oZRt;c#v?IrJeV}qsn0K z?HS4m^*zi{9_8g;^zcFX5Ll2K);~pi`V}<8kKw%Lxe6|+yAayA-20zTi@u?Nh&p%~ zmzUwbpVd24#^qg+DBZp{Wto>lSZJ%a zy$PWbjITZkGbmpz#`TGq<+F+NWui&8$I8N1LKg(wajJ44&UEpj7O z=>nW2?_s}H$EB2ASB=|wbodf|7#Y_*bKH?lrU3dtD=399HD_#;v0TI`2Fv>U4&o4ufYqZMc7 zT4Gv)o)T3of3L-bNos_{4W%W0nCzKuKI}(M0svZ=dzM;qK0D4XJ$a7;nsAODb=5R^ z|M){)lJSw6JNL4471k8hoDgZmWQNXnnq2_dIJT$=`~a4O(baC26miBprJ$ixbCHk_b3WT1Cls?Av!E>|m(fROZhbT^DJr{3MUK^|Bcj)^ z1C|sng(YSI(YeviZ=;2AZT_g)HUbf8evX%>GZ%EZ-6NU5?Fv z*?+?!Gbb^n26C1e2drkw(R`udzKL(r>0%lkC&f0Jn4=fR@1k;^0C523o6EnqLsKbw z+I6JXC8W(CeaT+OKN1%aybXJx5$h@0(hudWMXqoFML7F5G^_nx4%r1$aeX(PYOlP@Fj*ZTu!OLeny{A+Vfo_9 zrR-(0hj=c7$?kEL-#b?>y}$HF>H}RdDh-R-C0VPJ7vr*~P+yFf^zMS>JXG~`XyOLiU3?5#V|33WhlAbQs63)uN6C}s3Q&@Nf?1qM7B&=*uX;l_Xh`PY)>@17*Wf(*3*{f#j z*-KKoLa%z#GwYc`Rq}>>`~kcoldY8bTdfsrhc20U^dk7$R1&k0`4M z+8*kbk+*n9n+~*!$%5<>Y9Ef9&Md{aTsw!*>fW%P!>6~ z9P}f_T(HZ+GbTL0GdXHOc9Zjxx=zmFA6-1ziD#PCfE~`Zr^z%gZV4Q2`OvK=f%UH) zTH{`gGw2yFHf+=JV^*nU-Rr$Lx!B`r9v5ZOl?iZ*bG>lWvJC*m%>cZBbo|#R-c1?- z51C)hW7^Mlp*{emjJQ%EsNKd1!+_dOhG%`dLuPsMHdtn($4iX3t63%nfWy31ach#j z@tR;U(n9soWKX#`^4|a|j-Y+cVip#1V-5hg_4#V9DlVGaXNrATFadJb5 zV?4>Y!b?YFVuSwkp59obQWdzt6@I!`3MX;;Ef!63f5h~g%P#KrRinM87~Sjl0d=g; z--nja(S6luun)YOp?J2dc0~mv;cXBt*EFSJRl?9%_vw)@M@l6}1d{MBY`jpyr0-}B z!7ghPZN0PD#dCcOW`eq?@~d>unMzzwUelVX6AhD@PwbJ|Fo0%YG|ZC53xB)9FfgDX z&L9q*I0&X5+Y~f_y{(!HfI^6LWpVG*0T9CcHchfI7YTBZa{9Q)eUhGFxR!)sLbGR& z@ehfpxXOt>jc4gag1*GK4s-T&L(#R?oodgtuw|T&BDuYe`-qoyJKbukJVZRaSW8s&st`49U5^=?Z0BrU{;Q#HiW(GwQyZ10Cc8 znnVPJ+GP9jlL|JF=yL~m7EotqRt2Q6J8B7|FC5=TjXHN$p>Ki_v(ZwYI+;%5v8B|% zuD@0q|HXhE+*j^?PuCk|d5P=S#-nrN|Gn}5-uQoS{(UC@Z@rw~!wYvQBuQ=H{EQWC0{>TsVyjC=#0d7i3l9VC{y%jbWY$<#ZF&sW~cgL z3!b%zc#gt$W#Wg~>ariUqpb6X(pN@yNuSmh?Wc?k#1BmMg7UM{@fQaC6p_AdOl_13!r1Ze1_Vc7J5OLNt`f}c zs{FZ1*t9NYDEZKTkOG|2#%G)-CDwo)97Z_?5}jil=}OQ>jSM5}yKn|lWBkHLN1Aa@ zuST!)NxFs?dAu<8cj2i7C{)04-a*+ly!f%Vbvo0cI{cK}-iN60 z&d^b}BfaI}jiSu_6TbJS@?y{8(LRY)ed8+Nc0hf_X@m2Cg22sP3w526zB97AsAL?Z z#ite;2ZHY!N~>>}rOgda<%9+bEkco#6@ExIv-;Rc89c{LrujvZ1O+pd_7C5X zFRNX((?Oc(L@>Cn8$fM~`^<|`$-7>J$$5`eo^VY6nvPt-sZu!TXoOuMI-%!RaVD2x z?aU5)O!670D7JAG57R1{(y4XLSRjMlGc<+(!(jbx!a(u(b5)?mr<+dLwCAiy>O%!J zqtu8ysMA6E1%>egZdr_0#!y|wXv9!&BIH(aP|mXyehoUk_uD%kyxzRm4(%&-XIXEi z2|A_RYb33o6GsO%GZ=P6oIxovapW-TYouQ{S`m#SQw(EYN$SGsl%&~ z6`Sq(lV8qxWuE(S7sIx$99wSpb2`9rDaY*%$bsk2-F1E67*xR7t;N@k>%f1vmiWJ! z;?fIlP77XM;!whKw-GRh zJ#_1$CG=6cy7Rr~*Z^;g25s-f2az*(^=MtddbZ^MZKXa8aH-~pt@p3r8_G+ zdS&FK^ITsNU&R|s#}>ev1ByvH?k=mYCBwjogPtqvq$3qU?mO1y-fFIQv-aME{NuDC zYlYsJ>TC68=Jux13C~6tb-KjC)vE&;cf5`oug6JvF}8kI2#ng%;K(&gfp-(z;P%@T zrPUIOzh6P)>8%j^Wd$8)Juoe*NH6*zeaO&{C!LuKPbjjdVHHAeybKj=Tygd!gPOmj zhOO2^3QBTWO|x)dl1^vqPvo}xXL$Xms`&GtV7)9yfZ_8_kzaTIe82+YPmiig_%{CY z1AH9+8U9l=RY9H=6~;>|P@2!RDkw23uh;iK(zg6S12(CAQAAcK6=_8<6p`>Z{wY}@&DWS|84yLHvj&I`Ttp$`g;U{ZTJ7% zdGh4hQ{VsZ>EmxU{(rxc|KGFUn(H6zof4FROB?{3g0}`d{#Nwk(_vMW(O!kx7YQkN z`iXoLm-E>GRMl#bmgyiFFJu}JD*Nd1VCNC;ldO**#c~ZtaOEWLbZ$&qxyb9j#Noi! zY&(${q6X=E5ID(DHC{4v(lK92_SInlQCsj?lNK94d=bvBplr1URxSI1Dxh<|X~9SP zEQ6))o5@rFWUSuJk%P`smCsJI)4)8;^6PF79at{#XSeg5N1$4!B70Pe}r@G_s<2o!2;e#qw<2z+)% z28-)~!Ea8S*nGaqu*{bY3(_DoB6B+;GMA=~S%Ch1KP4bb(kfcaa5^FqbP+d?CKzT3 z#LYB1e2D>ka}y+RoMX20=u&^Gd^Y#^Y;O4sIAINkHT3Qb`3&jE3Qp>jaxs~}7FT(K z2JX8DyolVc+Hh>zu@kTC7U`Hq8n^nh3=hR|m9bF~VGEsbG`OrygU*AowG>fuS{Wk7ZQ?Rf z(2c>`15YthLi0=wFT<|LZ@>kekxMrL2a;S|q@#n`B%LKRXWqann~p*aR_bICJF@dw zS%yYQ8+gr8A#kI_K$B$ARkVZhXx!F7d97B47f|gy&7k5-6~#F({8kmfdiGRk#D@oH zoawAR{(>{A4CdKxIZ~8JW}_Cs+I-ps6uujv6rXdJR2_|RxeCUX0;iybq%f+e5Cl1A zHw>~|4dn1Ob~;4cbXqOyqqS=x8~>0uXYWFoAppx`&mLJ2BFj3?5_ky`oPq-73Fyk- z<#`r$u{4S9ln*|1w`$=X6$~AFlaAbL6+{e1RbIuWvp71})@_t>(BqeAe)SxreDrPn zANj3W6IJ-ozyXga(&)2l%gD1gFMF^!Rj&3-jnIvMnFW^|%l0P|@1lRG3-7Tsrse)X zUi=q%{p8pdwgH?J?9W0_Y)Lw!7%x4My~2<9AD}tzCzl+5GbKABcEXg?#fIW1u-i^3 z3Itx9S!BsryK}nHgF-F=*rZ>pwON6`olD&+e8`XihykhFZNl2LM(CkP(3W zna06Y#^H(PM3uBuMWpUURk4G4ygID|E+`GOCS80DT4zLS>n*Ylt<^;*O_rW{S*JC< zZK5@O6okQ>Q(fUl4Bawnddqz5sXITNPv`T}%g%vK$Z=dRXs0tgZKGQx)4WQCEWVot zGX}+|7BEMygBHqQlrLuJ1PxW#bs3cIU9{wNdZCasZ1*Xu(zx?jP|A~WPY@liO=0Qx z+q=r(MtZe<{p5qL_I2F>vdg9W1gdxEu%l`)WZY4odpOu;1NYEJhwv`z@F#5JaJ;HG zr{J8M)1J-w@8bN;*exAOV#RaM?Q{pXVN-)B$0*?r>Y|J;4DvpN6$mCk>kV*HnH ze`|-p=W&^Q^Hl%3Sj zV7L%uHv}Z}Vscx+?Q8x6mi!JKN|m2$v>m-stzgLBd`@PtgP+e1j!qBX9-T&Ct>1zU zht4f@1A-+S{gs6 zpyl)du=fh3fMI)EGQ_}5H5^^08O+$3MV4Z?;uSBn^@70AUdfhh?k%C~BA!FhuCCJ) zM|t$cz0LUv2S`!(Ka4;bVA--u9}RtbCLRTT?fyP4CLn2l|NBqZW><(l$nlKMzNoHK z$U#$f*4&9zwR`R!Q@>WEs_<;j=>wu8T{E;0GLx|^M&gRbj6xveOCj3!yyzEp#AaoX zolFoR1ipfw^iqFi#g$O|l%1A?&dK`Xf-Ps-5{}LK7`GoSMr@j*pIXL{)3w^G!?yP2 z+c)}fL!VNZ*CX6es!!Iu$8$8A}${Qj1b$~`qOA9O@6jopsXVEJZ9heQVahc!)sbM@F0y&D? zIS8W372axy(Zr;Vrj*83UgnEzj7yQa3IIiNyf1LTRdbV;f^HOOR~VF2piu#*po7aO zE{fYIE-;2RfF`AdYfnI6T;2{Y#ZTy=7A`osQ?J|AqZ zy!-5%#_I5GIe;gBc-ja6zAXpvY-hI-0DQyl>nTL~_2B%Qr!-}gE|YJdIJRq;wXcU} z@OJ;;xT}A>`To^Tmm29}41D#Z^adpKR(Kk{tLS7tdaFY82j`I1rd}ple7nl2ADT?e z{pqL7|Cz;?DXM2of>HO1O#sJs7Xbh{3_Iot1opOvkmE=cTR`g4k7M-Cg z;$ zx*oslwNThpuMt|q5FdJN)HaaTCc4RGUzV#(y*VDt3P)ZHe8;bF3FoI)9#<{b*&Osh z+9oR?^hr96=$8Na85xDzY*gzkRW(jEpFWLLn5ZZ0n7RgC9r@LdChsJ| z`0r<=qw>IAmz0Lh9Wvf$T0pn79%#Y9OSk$Yp4Ub!ynelmtC*a^4>@m1>-t4yK9BSF zMG^yMwY|N{ZH*mseZyh*NUfU;=(W&B&H=0dqqloqTe7zbXQWFwgPK?-gUt}x{gQDU zH#tX{6n2-RX=9w4__yBWq&AKi_X)e?rDNf)YXEWGr8Skk*p4+-+qQslQtyrFi1V3( zK^cxj^dvK;=6VR|%31RAhZo1pSoGgUzA6{zv!wE*1z#3%;nb~5i_1YtWwytcqq*Z7 zWuWrIDvUvygIgQsaniM%`OD21n}u%OW3=@LhauGmsKXyR;bQJU2?U)1vo>KZE&TD~ zd}dtc?T@GY*(tn|xFDkH`n6#+wuPEE*#$b!2i9QYQ9&gJAVXLo^eRd50ppaG+HQrc z+mB9PK}k1stC^x&xELPSv7*l|3jm*Jb930~tXhh=D&wN=R9u4mQBr*-^pzBTlcFKr ztm^2>vA5L>nTwOZ50$k(XE4rhp$KBf{GtcGkySAKK#^w|hN;Uhu4wbp*%>?rrD{Ak z-ZefmIgQk1!+BZmhF2l6V@T#&F~=f?)_6Gr zSbRzAQeO&)J%r#I<;*hmj-$$*o*lo*$19&B`5Flav%^qx%a4#$ixD&#y&<362jiab zE~n|Lbut*KNt|J^S1{2ijLwQEQp_Dk?~XZbIQ?A#PYi)#W9>bBP(H*1T0BYBDJMA@ z;LHgzg1wsRC{MQCo)8SLccG9}G_elfjymDT-L2(bcvCi@Yj^|rzdR2ptND;xE^XjM z{^WmI+f{n1lZ#I&2fP&CwxTDwkwJdvVoSzQ|2-1Gjer-HV$ZR3t)<*wsz;!$qUV^fJmd*@BV?S1!) zg`$)vS(XpUTG`BvziZ$|nhK55utTu7$@tY>u!j!jVl}xwsa_c($ zq?&U{;o#6t1kr3e0d(~2wysZ-bT(OMmY%G^zzltrdFtXVs;x&^+Z&I%em7T}tM}{c zvYv^-3yjRoG{sAY*?36T0KUNJU<=#W!Zx+A&ItKc*BT|SXYNP^yZ!k`E8O>0dTQs= zXNVgz4_kp&SKCB%5jvq@E0NaW*sl1geE zkAJStL3iK~X!iXlnh0x+=v@ZPYf8}v-fnG~Hh4YcSRUr3_PCYX(3LDxzQsvZYqd2w zHt1?D@Q?wHFpmaBWF402-|LyrD9y$s@$)#XYHafH449cTe%{%`bl!ZT07z{U&f7dh z%~os818y(%NKOO75ZX1Z+-zg%Hk_zy1xy)^y#nCd)Nb$WgZpY*-I4{6FN-f`_VED z-F*o-C8SoNA0r_gw&=Il)1Wx}9-S*A)&3H3cKZ6q<0I|3+FcS@5O`J7!aX*2ptQvq zlWoi|G+JdC#Z3 zm!Q9HdgZn(t*!Y^*T|HLMLj=v4wK0ujR5s3N*0Pf`T1y^pXPE0(^`YU%nX z(j3s6-m!JKS?TJf5jiys($Yf*WQt*UATn$5&5}xFW3z3P_LArpiM5{7i*(13l|O``ZyR zBh8whMapPH6U-8My8(WB3Q#=leP*#lb1td+lWURdY*EQ8nKe@A*sN5r&Cpc{Ct+qO z(;wqj>XcGEWlBxdb>k*Mbq5J+fPjcK&qBtove_2*&71TPPsbNBah1^I%OaDbYTV9v z^VKzVJvA%|zSk4-Sj)^MROm-^NUn1**E}x`#0EcJ>t-pz*dMDBt*NzuF?$}^@ysXN zm|ZgKxbBAr4QKJOuT22+o<~p2v8II^z&ChlLjU+`B|@`oa|mi`wyoJlCMvtjT12e3 zup$w!E_mx^f@Qj#JlCPQ**4V9#g)G)WX_emT(>D%M)ud26{rlG3mXPLz}B$x=Qi1R zjnTap+eSyVgMXTbZ0W?XQSSv>=2==G=+2Tvd_8_>>e-51k*ezM)WXi2#mSzDz+3H} z;)HYVHOZacLNp0FR>HgALiMQljCOt5SMM)A`lYZS5en+C3@vs1NJ<>xEG2jjkV|Dza#%4}_g%pZ0i zL!%9Q82T$_^$!!~1AdRqV*cJ28AYgzIJ^k70L+W=N1(}h9_2ImbeWel&Gw6l5`fC& zS{1!L_+BIzNs(Yw7J84Fj4W2Y%xn=8%DA7Qf#0kivaV64(vp* z5yHEZ*E)!w`j}r3a%EcO#VwDBCY;_WU8( z=S4glIdIh*6&X-~1vCNX2q`*70uZ<^=nV0UjukkyZPSjaulgq^Utc$d9$g&QrQ?WK zHK=tbqRUAqFqggg_rAkTGXHX5KfeScKqdHb8UHDipn3Hqc&M7@`U22I|Jx zFke*jMRgA{Iqimsg-#GqqMc?##ej{p64VrNkh+1MC{er$wRM!I^K>?Dq>Yr)>mwsN z*IPEc4k__)R{oYJUPV4X&u>;4>82%zkGuq&`pAv>YjE6_bVLG1tGf7EkK|)w_@s3o zDxHaiOq`y69d*a^9lUtDySnxaHLk3o2&!I*eVKcCmw;D`d8QRWRTuueS7FuRQN4Kl zAp~I82e8Wkb}fKi^&$*lN1Dsj$f3sN?mN^50?=zw^~vGW2X2l#1njVVxD|T1>pte= zX3Tfj!+f`f`M3@9-B!$@XVS!7(#8@h$(Lh}kyy^Ht-v#OkHx2Q?jEd~==*cT<`+(q zzKQ?7iT}Qd|GtU;-W>lu9^W?te0%)&$Il*rTZ{kx?8)vX{`;>P|NRfYnfULm@!Qp_ zGQHVaIdJ=uq~BXVo}C_jju`Fnm_n)BMBfuk^euvFD|d#6SNNV<)!DyMYaTDA=bYs% z>T2gLb5YR5hpu#>0(BA41g%i^J&G%&!y^Q;7_ut%#dc&M0Q=W>6H0kr`eC zGQq0n4lVSaj%zTAWV_Qh9>vvDs4A_<4#N6F7&FZ1M4;JR-;*~}4`<`##<`p7wqpa( zW6wr9_oUmr7{f>MR~Q}9kf`{6WJd|(0{18vyqRQO@mY1l4QDeFXQa!+_yq#^t-#;Y zWI9wc%&Xb+r_pPHd{X z@l{e(X~|~zWfxHR5P&zjxWYLHh3Ihy1zhhvm8tFE3}gP3z=R^MesSLEiA%8nP&nqo z5;-{j>yte%3#)~PW2?fi(=Dnl)CZu8>SFs3od!4D78p=F7;DZs11dxODYGTkGcLmP zmxoIv51iAcph)OD0z8OV%JDnzfDqSwq#l|OV0ZCg^dOU(!fjVZ#JBCbVey~i!W|B? zzu7!(^8an@KO6hc=HLHl`_D96=VVM)@6 zKHBLBDCHVZFu$LG@=qI@^)jA~vt*3tWb0)4gT0z_799rE>S=vDXNRd3cC2(KiS~;s zM(XTgCCn@5w|s%)e353LB@LqJI4LgUc}eM!R7NEV%gd4P%N%y5s3rfqvjrAze#tB; z($OmS8U!KQ?=x5rF;N@KTl@LRj@UBx zVno;v6E?`ec=NnW_@3|KOJ_X4iuTKL0rQcQS5Z(~d6`r>8(n~;g3q1toD;F(;L?-p z32mzCtnGUpGFh7NPhCl{z94F^cB{kB)_zA!hc`nXTN|pW3E$nlD2(uMJ)FZ#MV8%F zZWgO6(zZp{@=zPuZm%An*c^YqLxWtgetW_3QxN=4jjK2<`J4S0%40Yr z_L@9<$2Jzc>*X)o9MqsU$U@!Q5#I&ZkpLP5q9+n*jkSFFpp3?O!b|aGdBX6M>^RnHbF*{hLroMO_z8sht=m8prP!$7Q@M@KVl zh+VPG-Otsgmo`%Xsfpj9)Ll{9d`=+~lo;YRNEXjTz5C3HUv4s3q}{^`eFKb-MK;|2 z<+=U)lY_f3x~f*^alA;=a`VS!Ub=KUY&;NG7zYko*WWFuZm!4O*mB~YBX{gMvU5oC zTpuFq2E2|FDAkH{)T1$~(L&VV%gNr13m4^&wVjQ*;Z0YQ?=s?|YJJqr1!@^E^fE4z z@lf0_ywk`@l-HF_E(*3Jzu$(}O@#XK)m>=Bwd&-#x>!Zz--SB%wEBxm>>7QvQHlNk zSN0+ZIt&EYHF2ygu1@FK-N}U)%v`k9&9T-){HbYFtWtTlZAde1^%0orj@Yksqs+bc zRU@kn__H!;n>7d>!pD8fd+Sj74DE)#2hyTU3jbXwC>Y()@NV!9q2G8P&Tc?45Zm4g zflHi#J+LxDJ-aq~p zjV60AR#z!3N{=RcRKj;LVpl9p{!IHTF*Hr!OmtLRlLwkEU8aq~Q zex$?p(1GhrMYg_&eXlO~tBr4JqRB%QH{z%zh@^+=_eD)Mn^=#(f@ls(T+YSlyUzVG zGIyQf(rG34$740^vRLP4AIq6zD6C>>t*Y8U!FIU{1y&TShN#{&q}R>!5NO9mOxnV^ z9Up!$ex@ooh-#3-|9c*`)x5PS_%7PPd2W5=1WN_9Xp-QPX%&75%F7M zs_zeu4x^vnypGHfHXZe%YhZqrq9Y*2Ew}~Z&C*e{e|)G9`*jSyafBg1@z(t>?V74f zGt97yP2SOI=w_1$p6}bKB-6jw?LNlWl!%Sp@Ky=koh(%JZoBz~8oP=~CUIBuR1Q;ZGftTMy4xAus)kGX3p1aK?) z_`=*frN%H!Mv;_2q!(4rZP6l;kgL|-{C1Ci?tK(ywvTFwC=J;V`gm@1RozG_(?ii` zt!!8+$z+(@O`CyYu&aX^H9-ko1TbJ4iIbxT15!tDlC1?)5p<~873$PzW1^7=ae6n7 zsUhI+jY3I6mA0}L32db>1+)sZ9svy0NA~&Nv}oEUhCD~cMTPaKmy&GDO6k{7u z{C?fn&9BgX4v5rr3RerH>M9hWg-B`vp$2JHZ$JCHbE`^~2(E*m2YjJc>)MFcG^_&A zwD$qF_&%u|6&V<=uwvOP(zs${F^=W+(E$3WUBZk%Ht!p4Xs}3l>Zq7mD(EuGHTo1C zzSM5DO4~;V#_M=S-n!Tg26{rhH*niHzuzt-``F8msM>$jgpfJZ#`9Gs%KQH?J$sH0 zntZd+(zs27RV{daGtDzdl?VM&ZOlj2N^<=^II1QZa1021RY4=Ec~FMdZ{a}0bnN)d zjIf*mS7A8&La`Y&u`ZLjI!BOy=4)wecORNo?r}^j&WQX+iQ4wawJ(^$5y|Pk5CzIN zR~t{;Os8Zx#iM)=DRqnEYrM~}N;WUsX(cRhQBVBxu{}vT11F7{k-U zpKNRQFJb?=zEQOQT&wIqKZ*V4R~h@y&DFoGxSMNRtLrbms;odiXZufO{j~f<>;J`z z&3XR6c+HFb=hn*F$_rZmn;WYy{>|B}>_02}|7!pLboT!nf7x8!d{No|SNngu|Lcd_ z4{!o`kN-!r|8Kt7+T7S;`~S`L7nS{gwf|T9f93zB`uwT;e;{f;AL0Y}9{(>dUTm!9 z{l9FkR{nqgEdMW?pOgQWFIco14fs=*0UJHi%Ah$qo+k0oS|^ItPs4e-;VMsJXN~%_x|0!&k;xn#{7ymiK+3!`B}^PE{v0t<6WBZn+*PxKyfSSHr-ky9ka1 z?h%}pj9N}M!W}>MWaoZ6?;_Z4d-sj=IvgMf)wc8Xv-lhC0fTdW5OHS~jhY!(z`Kds z95U{J*+n(|fe60QsQ)~vyT69Pq(N;-?_rFe}Z@y0Y-~7Jv$JfjBm%p!C-`A|~>+1V5RjLb^z2ny$!oDtEQrd_6 z@KO$A9@c@N@7O5t7w>1#iC~1X(a|9*qo%_Gc1B8hH64Ga7e+ppXZ?!|7Sxzd{p2;F z_aqA6-8DuMrp5G5eWmeAJRRkerYRkXhv6;L1NNE*eq|n82cxj-jdFBJi*9;1g8eQn zDX%;7ccfSwue$7cmueQaF~FwL58|N;)EB#N!v1XJ<596uXHHA)2^|c)-mKAt!fQ* zO!BKJ=-+3!gtHd&f+uuXvJCrhf1@pdwE>SR1f5~^)%Myx!s^7IG;zq6V62+DW$`0w z{P?5a;HkNc29Z@yt+ydVo_dp=`m=uBiHTA0cj{4iTmRp>Y|aL z$D*F6!`mLOJbr95&H{9oz!;^)iC=f7Bm{7KlKz~n#~Q{rq8@G>S}jH z*;JDnFRq58T$+Ha(SWx~WHdB@=B4vx zI#&e;IGH*buq72~G+fvW^qEB7YFyxqx@&R=@6f?_m(8@dhNW|V3#Oe>NC=OU1~Ho> zK8JSd|KFq=Pyb3w>`Qc~DRU%eseU>BIzgYl;qT94`wz`Pnx#mDl>7=j8bG zyrWJaSrav;N2PVs4*E^vke)RrvvITEGzZYL1}vp`mu<$m)F>Pk6WJFUfaO8>J=!ud zkZKmFcOCZX(d_4{CG#?b{jwEp?%;Jd(_QhHoLpnpi~v-a9{H2|9w9W1`}3yJq^Req~nL^MeNZ5UkXlRr^`Ug z=AmzZi{VXiQbUOuNVttnwrWN%5NmBTwelAcU21EYH=4YtvzmDQ(wN|hwT$}V&ZfkE z`&nx^JMp50@*abm`tf@QwG#9-wW^~Gmc)D;(zX7vkP<;k6wGcl_f3UikHCu-h zhf4z;y||fH`fu~~JpIb2ew>>--hQmjN1->}ZDc^%{f4drKTp&(fXKBq#ePsb{h+lT znH5}I@o8RR-ABcRU$b&%Q^+%mHA)wKGA%LB zf6Em8ud(9Ulp^Tyb?jSlhwFe_NBtJ$aKt`zgmX~h2$l{Ak9uX#A!d*&Vgglt72(>y z9cAa4z@d}{Tu(B}>Pa9V7nG-F@XDSa6k}$V_|fqcs{Zec8R)lZ1qdN;M6|jG1A#}1B9twts-&hx_4zu34Bh%m1Sd$Q1 z?wuK!HoabC-5HcQqy$+6S${WdK4MjCIlt-)4O{_u?-~J|+;PgUD&<#`QiQ(!s|8=F zGsl_B>Nwjoq?p?fU|3etAn`03vX>_m&sqb$yYHHoM8&-mm!L9``bsC0l&Vw<485R= zMB$33;>MShI479TgFz@5Vto;gik%nwO#83sHNAq4iY;YmQ!d+7C+EqK`4`?Lf1X&| z1NK2AAV=?=YO_#nHSOD)4AfwuR(pN0dv=hQ$H8=p!||o!w_vLMjarG87g8$~A<2Wu zKqOLDIt_UH*-!0f0p8(>qkPcf7kv9#Kl!}E zmxD3l2HE?SNgE6jtN)!LU_g`9>iGIDw`{1EUKj{y5)LD zab4_{y+uRF=5unhq@vYj85*IADP^BMojVPla*_Ikx73H&=Ec;Bz-2C0w-qC8vC)0u zvDpa&71sVVDVbNfPF^+1)I6uAkALm~p(2|23C%i$S{G}V#;^%L*Dvvc21%7+<1gy) z5d<*xcD*14w6gKMBQT7#6GPOeK;L>!P+5Yct5JA62TSJ%Z%$tCo*w`Qb-iBd&bG5c zv!oXbev53aZWfC!qgIM8@<%NdMPA&wc*~wP>aDshf7GsG9R!dxM?W_X#L@{lEt@ug zRXhfjU>@H=?x6q+`6*e&PFFRyP|*joM|XO*-b zguanilxZDZrv>227vqP!X9ulx0Q101t<52DaavD*ItObr@)-Ko2akW zF8XA6D{Z3`Z(y4C!`sO`q)qj^HT-R*)GBUC**vvo>9Mdib)ncC3*)?xY;iLPpF%K+ z&Up9Ch&6PV8>M|shxK;4cu>yA_$)}RljWAF zWNy2+uf@4833Hr!yZ~1XpoNn`L73awEEnAy`CjyAvr`{vY)Ag;|_KxYAC~ z&(?gSYQ9QqmdrY5r}~|MIBD?f6q?g-#;$BENX2r#n{#-ZAi2{= zhA%sZ*Kae0&=32BvGxT6($J2)be&iL%{$gU0d&q4K>OG!ZZyYdCohluU@+{4QT%?y zlL7H5=_iL~E8%0&(0wgFmX0#06H>|~(HQoR&Q4Fuwjljx$}B2Ps1!^e=v1*IRqO{< zOtTfE5i0mG+jX20j~Vsj_hT_iA$>wJurZ{6eJlc)-`3)(H@s;nwiNqOmPLbjy?b^} z8T;SuWf&58x4*PA&D6gR2kAt=_ToeeoP^W+XBc@i_~Hhcl}&dJ-a{ch^`sY&l1chl z)BliGA4mvUJgU1&CatM;v5vw$!Sejj_45PEABB3hY522AzhzmL_Phyha$R^T_xgU{ z&AeV8m~xyOsVuRr^7_V-D1`@uL%Aa6jWE+|V`_CZ^kO(aRby)yAtQ%nP^PP{|8D9{ z`UuX{sQ+_098=})45pE(KgzTyO;+5n{PsC{8#mR~%P1Uw%TSazz3Hrb?ca4&+i9gY zyWbylzCZZsWcTp2`G`S?U&q|M2>=T-Y<9-z(dn^n-DJ7(&>xKsC6_$|tCAOWF&mcm z?Rc00p0J#-292kHmoF@xV71wq-lpcRmf@o&5qt2MWIZmyP8?5%5&SdmIQ1*@;h9hd zkj8uE!@ycKi#Ebq8B_OKhk3^wNTQUu9R|erBG(vP-g1kK1y}uTfK|GI>J_!*oS3m} zJLYLwko6qaU^>AHZe;MT29+un+>fp4ZPUQ%M!t8g)zcP_T{~Rmetqp&Q9%T?!DQwy zf_r$J1E$NQ3Jj&AQr{Wb(>r%Mi-!$Wu&L5wKk4*laT2ltbfcjR!duN&&L+BfpDT~b z|4ZfnrSkt$`F|<+e}S$n+-iJ?*;;wAS^0md{J(sB|1VI%JXfR+ zr`O(qMoT^SCoIp4{T@u(dvtOeU(-I$VlTSmRtX`{`t187UkXeRBdrGqAjASfoVVt_C5ICA5^08_5c$$bbvP9=P?7IdI$@#aA;eh$NnvF(sje}yQdv}5|U;N+5v?c6%2HZi*38A=qi+fd&(9)weh>E02zXZud25Bsu zhA^Ybse2x!F{88}L|V=ve&MZKbc61gSABNBt|8TLyY;35NK5SH8(=g?QIuiuO1UrJ zxcXI~>;UWll--AaXjAZIZLD)yEA!a_QGJ6%{mbp)91fLrk zLnFW&7d$hy92HYmG=Wj{6z>44FlNR*o+Zaur!e0a$t;Ks14d?M4Bb2D7Xtqa$a#do zez3upTpleN$e?J?^09_TMV=L*91P*j z?tc^S+E{bG{LG~JzvexG zrH^2Y1M$b{G@jAq!b29VZg!-AFYFML@fD|Du+YkX-yG0F>VI^^cOi0SEYl(Cvbi`D_{^Gfjyx`BY^5|ygMJ6NjfDno2cjtDCL0=B0%~_ApUC&?HjsdQ9P8<8bo8!qVS7PTo$^fm4Z+z;8fD?a`>i z{!@ZIuGL`YXQ{Qg&Ne+KL`!~LN>SKQacK(#p-z*KYEIukLFn^l<=W+yNulE5Afl?3? z^Zy-ZD?N{8%=(Iivt<-q-KA%-E&)bD>034Eupus@zMw$6q<~^a-h=P3=kq@HL-4ei z`&h6%7BM07$?AQ05$>pAMeyr`IS~T#G>iz1@DuVOt2sX85v;|dn3nhDLkJtS$082< zV0tjl<3!eMPK0Lfi61B``F|zoA>-Bzn)=)1w$l z2vHLBC@rEJIvwl82J$-rioZHII;@G5Am&@0d0>eA@@P*`Jx}92Sw)Irpni<#5_~ip zcm3%2iZ01BL?($P!V?<`z$Jep*5V1pnlS2?#vxtfb$F#Ej6^GuuqsWEFut_9&49>& zxg{F4hzV3pLGW1wRBc+W(<@O-P|fxmzf;b&z&((kDbQ!7y&Ud~YGLp*tx$J6|&?bKT=BY@ri#Zmz;QrWe;N z`(I|YVj!bS+ba5#0i%%soa9AdoK_m|4`wC{?HDh$gaJEU!XhPSLi3Rm$&OT#j*GY7JC?A(krmP2|Tol5woxhIFLg(fye#tTx$JC?m|) zc%JZvBFBl-Si%aTDc&Y@E-Bg3T3Q=$rDE(fFk4iVSsDI*cGlE4aes8DCrJs!cz~9X zBtr5Xh48Mf=Rou!`Lu!vJ9|2fHNv%z%}z^grIVmccMjk z+X5D-(X2hdK$AD!Z6$@`VakCtXOaNY2$Xg-WPKtsgTqktdAL_R3PEwa5SU5q-f=B{ zzUd7)(kLs^G8Zg+8|ajv9q!Zd*$>BHVA%z*6ZeKbbaNA^--0Kk%5exk#QlB%`lBPC zBUR24TF~me1wcAgbpz@cO;2HJhRYEfPuwH1fM7t;C3eG}a?Q85w=VH5NyyCU#7~xQ zgX>@!Bg1pnPbUYR-M8o89iJYa{{*<+jCL>fl|Zn!on6iFYNdN(q|e}&-`Q0K1L)TK zNzc{b4&Us)I^ek{aKRXUf7N>2cJ_n#+Tk?Q`r#3sm@jwt4!F4Sy}-qlrw_*@n_90A zkG^+wBk=#N+CZ(7kjl4{usy{W-af{&Q4+{c6gFUvj`t4?m}whBXX#@ z1{eUk{@XLD&GlgyV2^?}(_SzYe)0?FN51qhk#eW(Nn}KU0}T3D16Kjfwj+TMCUh0V z{fV!ELfi~a1;~i$H1H*6S%|Ha&PT@^AiP2X^M89w4`%~WdB+O41)n{pn-l{hW_cXY zIdH><@Klf3snhe6bF!Gs=Z4GtvaZU$eEa&fT^4L+<5}$fXY;wl0?+{YsZx`?&A~p4 z8??hTzvMkegKQb#O^t3{MKxsL(hA*uQ^+>VgG#4j*hy@jSa`?_E^W*#p$zGfEV!dN?V81v%DitIV} zScB2iT>>jRvf>>9Lwx`k8zm)R^k`E^mj*5r@_`3=s z4&)R%g+fTyE37PV9)B%k-gPF|p+rigoKkVZFJl9<)+V z#4>zZuy@YDyTQS4WLhm91Q?H-BGQww4^c2jMJX(fwVHdSw@PLQV=;(%^yEGB>b^U> zZ! yiy`fnvC5y?34vz(Rns4ic`|MWI?B+ogKG!I+_b$;lp$sj2)7K|S#5{;*k0JVW(nvl;01lHWr&&g z&FnqT$r3VWt(j_HID13CcMVk19v0m}8-IW`ItUYf54iQ|I-a?7+bro_8aK-7UB=}H zuDrICSX10x!vM+5%iPPasc5dirhaX1Yv)G$r(@he-O~=LWaYHl?bDn#;FqkCBKbiV zc`BD}gKMU)^37(e#VxXjL$Ogk<#14Vs`KkCPB&P@)?(+3_wJHB zG_Z(TP2$?eme*{)uh`&kvZ#|g#qwB`lG6=2HE$n$K~}1dEU}_Hq3=-|KQ|fbQ;bSO z9mEbF#0)JZ4$+GY>CVy}-DR%xnH@AQPb^UNqM_WC4y3?P^}H#iYr%!GJvT6xtqu_n zF{GjAby&2vPoZGar z3|wxz_JFehB_FbJGOh|I0oS(Ck?+{z2Z{V#Sr3xMVP3Pa5{NuOAI zqY#%Os#_ROdh36$mm%J;O3;)_0UiZb-N7>$%56`~F{y)7ah&Q6if`QM1C-vj@Y>FR zj`*2HyK*qQS-NZu{S;sE^sb=`=K{5S4bh{D;lA?{NjLfa2`ptO||a?g|n zZ3FSS$Ct&z=<`b_#eR0a(8FCz3SeKt3=@@q12e)1xqM^hmdqBvr21fM9b~|!^l28I zYD>CfTnc|kcZ?AH5#2Gu^=x+_fG_Dv%V8~|aH7=HAKoN`T>)JSA|E+3!X1BQ*s3Ts}E=& zZx@efAMYNS_T`!313JKOACKq(Kb+*daDyo2AJhke@VHL=0Qa<3FW$x<)(1SlJfa79 zx-snsklErp9xZ8QbUJuw>2&Jb@;;I?DY(V$s4YrnZMuc<<)f8J4WqsvwFc~^Nn#M= z2F1``V*$Wq-Cs7m^%!iuMoEZyS5MlJs?cH^WU&Jy>QaOXGbC%b27_2PB0Sn%cU;cH{HV*04Q zJQwrqvUNb{D20Z9Lw+2P66q~#=Y2L=S}Xx+XNBN@g`%9;Laj5TpJ1h?d0jk069|5%Y}KQn z*9+=7g{gYsB_OT(92K9dY9&2PJF~j6vijB5+U91qk5+O?AMb7XSMToe-Q`Nf)(&5} z-_v@W^{(#>LWZ+A_Qv-&G4dvE+@H9!>pR!)&!in)Inxv$_h<`>!TbArt@_(<-yWU6 z-P&lMo$ue@(u}J0`#l%yKJt3kF^`MS{O4Ink7>5GvG!ywZ*Q#S8pk${w)aNdd+` zn)1~xcV(j^GMtmW?UnnxmTyJj;lc3J<;;7RwIbFxHrHRQ{)JX;iV15eY%EXBhB)d|$}O6^SpV=0C}Iy$sbtZaq$*?F%BM=YB$ zL>Z#L9u1j|ds>9Ugo>*)9fNe}#Yj&}WF_DTz6hY$V>iKbPWEEVJoK=Xnuo!P(t_sl2-k!TdaQvycF~N{ zq18QVs>ns$HeA&7U3`tLSG2wl)+F&p|0c@%;r#4PuEun* zsKUvsH^AbEn^joM3L$ju7Rol$SiBmICO)=>Oa;*8kVgH+1rI%qZ{2d7GWx zB(bJDjqf8)+q|*)%2HGF%S>ff{0A60-iq*g6;I(`QyEn+2xt8 zP93V6#_TABt2b&)NTX4|cotvcf9F}e?L6zV5DleSm5slw>Mv{h%lc&#k~KOE)iJn~ zIP7}JkM0O{yD0T*2fR7#@1GmEJ-9_V|ndCD9%0eS{9n_4I*04OrEX z^>xAtO@_)v79D@F*4>oIWq>pE$MS-U@CYb}BP}L^kX>v%#i7$NNVZTKBuvb`mh%s$ zS`!oM3um9-iD7iWViglAfc1#m=}YphH%X)m1$XtmVKC}vA_FMc=g0fU+va2@mSz~v zs3i5W#OJ-2i9|3BXlT+NAKbI4P|y`z;?QvBQ%_GMtqDpR|M+-b?w@P#|8QR4kRI&t zlcuhsB(LKuSD#rXmK$ZN$1E<$58=u(_#ZA&NTb2sQg38HHG9>!f`b*ghwVkxd5f{C z8%t=Zm2_REbBu(TBd8O?W6ycNBuqX2kz6z*-E2ZS0U(jM^f<0Rw(wD0!DR zHrFUJ_NI1VM%p^DCydnzr%GJVZYLW(Ha=AS2c8~I+`nV%Xy~1co3yKQ;;6@(mOOG3 z8&0a9$YNa6VC zZ-)bRGz63z`Ci|+mgC)laP5u33A(J1uF1WD%DP}5q2ClI-C%tD{zvm7(o3>=ip6s)eb??`Zn+ni`J%<0Th5uG z+iSYGn7wQt^omAS%1jvU!Ct_%_glb@y?|?T7I3Xb`_C<1%&D8(z{H8|e1tt#=6eo@ zoE&*j8X0=<8anpJA>oTT0D_`TQAK}O^-y+;2e0|XOFPr4IqO?5?N7C^iAb##&C5%c z5Ef&75jkM5# zD(i4Q>J3(@1Eh;}>`F`0NP#sq3$k$c*E*7jTgrO{2W<8F{V-{GbeHKz;k3h7wY&q2-koloQA~wBCt^0i-N8;Cd$&MBiBvGIc820A|^12%(ZsCIOwrX!_9h* zs_~}9I?_?t^`IjSx0<6+>`1#&2s1Y!IDM zHkJU?lWW(goOL4qD(B8XEYhXI3{@r3oo% z5_T;{+qQYpUNb+Rid*s{BTv8!~inBx}|xv~&w}M=wlv3LQr(H`q$S8Pg$q-!P-Gb`KV~sR8gx zG@>Yzt+Y0CxwcIgXWfjviQQ!lwf@8F-J_o;N?p_xZCXX`7LZq4fao3h)goqVN;_sa zBWmsiQE%k8u4rM$!EgS~>Z-ePX=$f&#b&dXL45?J=tyXI6sizOg0W8hia| zhNAi^%FetsBC{3ImB52?%jI9Why7_vP|Xa<_LBRg#H9^{H{M9WCJ1G~xl%mq%1(lZ z1)BNJ%@uDdR*`g3pPjvSemFUzR{okTLL28bE^vhAGzXo(xytli^ZZ#mNZa%FmBKVN zMJ>Hl<(u>9uxud={de9VoT$OyT-m~M`h~cZ%}nprA!f_ue)KsuDVSFaT z%p8j~_pCP*JM>$hUj}IpkHUAs7^=3ihu&H^;e)}(3ddZZhY@5doV+?cI0-YxK#G@6 zZEok*J$dyy?0KU?{YNcoz;pXkSxx*y?s*;TRPXJ=g8}6Zghh+F(6D3m%Yv~08;2@V zMG+l1|59=dCjE21VA?~sdr2+O${h#nE|S833Sco_fPs{Ux3d0e= zX=T4Zy0hwPs{BrhNOR(b0t7k{P?{*8ZgwNOz9)W1r9I{RK45N)`=P=lY0GSmDs1d@ zEPS$+j+zuk(JX%O9B0Oyn`*1W{b}w<(81N3*-58bHdsR%AQnS8+qJxpMJ|kr+EAnv zNbWGqL76l8L*MIXcFjCBw2C*d+MHR<5{OfzAo-dfPiCPP?;)xCJ2i@?qQX;y&9a`hx-G>*n{)hjjmy_*=a1D9XBjNH@PP^ltWcl7^)LFz1{N zoF8+w6elaYCx-_U1n7wxP^WSK>+JYQC2$8trlxdFR4k0{ug&d}yWqjf+RB=~Gi8)d z>N*-kJs*69vR$3oXqK{(3?cTDd{taok(sg3&5yw~f_T$)b*sk1r}1zX(-g~7oDrPm zb#?69?Cu*lS2l%oGCw9mp$Mz()!ti6Qk6bIsCFnDLd|{Yp6SR5<%&?%8(SAjBR-@7 zP#TXC1Be^^ULoPj>rq|5S)?}8-dHOOK;Ng1QN;Dj^fd&MG8vBB`PCT6HKbb8el(%7 zE{h@>meUGvY^lVIi_cL;oOyU$5MrkKGN>rCGMhrl%bpkOVS!4OPL1u48(4Ws`KKH< zJi0?O6aQA-&hxo>4WjGTja=r4v{Wq@4>xh6ncC!E^Na?n-zh3F`fH5)2Jd&?jWvvh zbY8}o&2l8K<&+cPN1-jDqTdxYINQ#}JGVpg3lcXfFN_54o6A;EzyX>xMK4)?5AmN( zgtyP@P4w|fiDX4u!xY<}ji<4!=iVsZNw*$FuIh!{mVIHu_LL5dy+m8iwSU(E9ZiQ3 z99mn5v(48EXj6F2+P3K`FC(H{2)GUPloofjR*$NtIyk2=G;wrEn5L+iftyhg5c)? zi&$b+1Bb;7D5dj)3+XNvi84!;GXu&Z%M4-KyU|RxBT%SPL{R0gRmWx;5wQWq$GOle zdUtL2E=%I7RMUFbDH5;V+Fv;n6Gl=IrtP$ALW4>YQkO2Ktdn9D>Rr850jU zMIh9=r8KK@)iF=E1;_x*Nq_%2UVGqqsqY!)lw_3vbwcV;Apt+%i@^g?L|M3y`<#^j z1eMJ0jAcyy&{sZfrc>XGAZt;AXS}VSoqeZ#hl4&{hm}FIinBa2szsFU)=f7trlT-< z2}e5fyO{O+YDrbZHCqA5!iXffo!~wsoGblO7?&F)Fv?vkLY_jtgh&V`{Z1SYJA?|F zXf^XEyXW7X;f=4pJd2~{ZZKJ#JrfVB~AC3XS#q-^&JWT$`2dWNP|f`3XFhET71szpPJRIk>{{BL zCM$6yQ82Z|Ls-elhD)NOG*vX{Bt`1F^<2BUL!0BdAqygNv z@OB0rLjCN|iVPQd$ zS~h;S*K@9a;CVA==FFLQGxK83e7>1ve1_0&lEJ>4){G9FMF!^4WHX#!;;FDUYt0y! zHzI&m{CyugRi8!hNz`ahZ=2{5=hMc}@GN2m~1YpgO>enK%{i zzZyyO510HU+qS4^PO}{6g%X6PNpF3K9d`Kv(-*G1`B8O{Do_<9v!qzQJr-9upjHWc zi-)1>{ohpuJxE!Gnm8$naE2mwhTwust%S8HkJ#mtgptidZV>xjCyr;qkSL}DcYe#G z+favBb0;k`D`NwVgRCR-&BUCqD1FmuR92sFnK`_*-HDWdKiPmxIePx%O~RptpCTbp ztmsoG-Z$*rQ5?18husVXH9-Ac=^0ihx`vf$W6y;u8|YaIJf|n(?2K)=X-Y^$f0@7X zez+K+=10C07n^>Tt5j_vL*y<%Uh00|);x@cP(j-5ApG6y)}yj=*MD~FD$`&?${($) z%Ey}3#-^ypTVHaCL`Qhy1cqx7g>b#g{98s37R#Qx5)cNtyHFJHO6h!g(fGt(vBVf| zh&Q&-N}eGUkorBdts|)HJOm5O9lIChVQcU~(xA`A$qN!oG@?!tbXhXgz1aj}Oo%}U zdy@JqhvsUVE6?>G3sWKH9W2w?+DPRI?pQUnQJ2|cnn|kLBG?;}IT{M{`mvW?@_&iu z2eHa4RT3qIX!y=9JBePs-XdujXdWq`kAN@S$9}L`Io5(BNKd~GSLeTxeFU!XxF8W;UiEX!BvT7 z^^+IZ;%8nbi~;e_dUU>(#nKu!zHzL4mC7wsB2H^e$#EADN_(`S<8Ev`_;Ng|H5?_5 zP#om-_t%XBg87u{7O51 z_s493<3$E2*&4iVyJ}+An~Ah8IRgyPuoA4iAwH7~pP3kMjU|-JqQk(TCW{LzLul9% z)O@)ND~~h5XUZ|ngPmsP<`w)|@nP=-vTt-9JF@X2kfGakL%_D06fi&kWgl?%#lsT* z04jUfbu-*>RBQ94jW4b=iuBrS99RI`>o@v~1$19@9AGUMC-#B=s6D3hd}%z$4>gL(?u`3G540o&c9Pn5}PH3jo=dPY1x;7&O5?a5GoZ z40yP;cmBkZeX;1uQ&Tb-I$4~zPi_;~hPR!3>mdrv<;SZ

    pp}mh*KXq^)%K7c+a0)0?!CAR-TDm;WpUSVq>jknUEu7A%)CFeg@1JSm~#tt{~h zg`xXY4VYABBQwF<8|LZv^hM;|#8V#eC#Rtq-R2uzylEZaxV%Q@f!sGIBQ?mRDZdp}$G zt=^_V!{cjPFN-$OY$WWczuF?r^QSf07YCNP4VA@tg-0yP`3&5?2QgWUYUBMjp>rfI zk{q6o%x>uYVL4PeWs2#b{e8WUDh&C?4jZbJf~*zhZx%OTC(LT5LZGC8*}0 zU`00>xT5dn==fZdb4t|wd5NxM_7@F%$FtbrcU7&xUT4f!UPiUi4DE&MSrD3S4CH@9 zz`b*o4{}KDC%_62*RUl+MY_^@EP1mB+P1kaE-3E&c4xkH;55OQv9k!7Bh3z*b~K*< zv#Uo~#Hzcz0AI>^-DqHgGc!tp)y-BCB^j&POQDi=W9<^Qh?JIh+0@@-N+*y1o&}^y zAaw$hg>bRvRMkXHtAVA9bBKAEO1ZHsv*L00c||(~artIqa929WLgY(2%FSWgNrs1N zGrlcaKws5=#}s3VvwBOTyct%i8+ctpRtqYUc5B{#R&-h~!7xIZnh=%ot89x2RT)=W z<^Bl5^Jpq={PWHxbm!COln@87RB-?1Ki-UzV+XIMZ|9e5;-e!rVC3ji)!+C=qg6m| z5O-QijXp)rA^)m7+`=}m0Xt~{OBcZHtCqksOK7mv-`oz-1P1 zi2@MAeLQPGV?!38j{;^f;CcYIbd{+I)Gh(1G(xh!@nC~S_$-h&+hKAD`zPfp6DDCn zi5-l2_pvDH2U**-7E9PnZs;R&NiwjBgU-eKmQLgxQ`Oy%12zb@#L*zf<9b5Dz#=q4D#N_R(4;fScCZ8>&P?a+w0F7g5WA_l{lF@x~3z%lMZx6sFOD-Ehy(h2M$^b zHv7!mPXjM)stp@&teIyV_v!v%KzCmhxWHxO7=_AGeAa6$4C%-rska!K_8LD$zIRVa z@b#$YC%8;a-AP{&a28JEyT`Z62J+Fmt+Y;c${Hm{;*E6)vs-BP=wHq>jH>#*=~hR3G_wr>ixJ-~6jtP;eav&W=ad?7@XX5*vsXkC12fiS=7&7F`TirL48Qio6Ef8EtCm>>SOqAqd&;rNj{>ZzypRJP%a3ad{d+tWXRU(v+ZIhEwf{AJs0z{)h ze^jg`#NtM-Bc!L>`9zqMi&@zkq({2+9+XIm9jRFT{PR(I;d>wc=b08e^_Ov)25bVYmj1&j?lif62a-jDbA5cTO>dHvS*y?td&-x zFB;r_q^k@D)2#$fL3dJ3hn)F6@cH9BlyWlDr>bqRW5h^vhv43AY&pIH;1+oQzv-$&;8 zi8IBu9k?$CD)yYkImlaJgqSg?Vu>tk+5X<yrUxD}Z!XdGvbLmc zs2APWQu>uEH&XgL3VY*mWz`vcz=yjNVo6J%UNb7ltjI`8%M_(Q>G^#rXM=nr+I$x1 z9qdq*OKR-D@6s8hDyJPZ{%*puh1bR%wHKD}cMw=6HZRAj)MC!Fk$%0!7XLBJdq*e2)XzqdPFa^2|6om9~i#BNr>a6|KnISK^1yOW$Rfe3xd3Y zPK&ZYaxzH2B{xt)nd&~7Y54OjS(`0027%UCZ@F_)ikHz6QA$sqBVt;WpM|}s1P=>; z4OzWjAWYAW*pL2s*G#&+y5BD@LZ+qXJ!WQnMa#5yK`jVi1o>?|KJD4wu< z-d68`t58FCLaJ{b%HTw8pe}hEn!+W0u;;K92YOqk(Uc0!&*pP)Umi7$7v}nasIeIU zp`9PDtr2?B?@lXuq`hLi)|KN^UTAfliV<`7c={Pj5DKC*l`vV8lzGbUz?jIx11jEY zH%rL3ZA{)##;5H(IJSGAo`6k2*F9HifB4Wldg`5tn4h9}Va}lG44>U-vHW8@-rk|l z-F6lpUh=#e+-o7!#BYA?H2vG>Y>%Kc4(>1t|Vy!3)XdS%I)@3KBB5bapH;ur%{FMAtGHrwU9t= z93O|yzP967i}zW9EEMVeM|I84oh1wIpjY0Uj(B~8H+&)N;a*<^ET={D2dRxX5Pdt6 z3e47?K?uMZm215MPh4LMx{^~)r+~BntA!SWA=Dkxam}bQTRbod9QA;~vDb zHI-uk5(`9+1497B7oVH~V8<{DN3cnBM|FGB1)x&bWZ3ubx}*kMnr7-BSkA5LNz+tt z1ftk)hR3Z~%djv5y!?~D+Zk>70#M04ES>{6aq{JxxQ;VG_iyjxI$8k3OnDCgMiw&{ zbOQ5~K(qpGZm#Y|R~_KDKu{vEkNNmsc5?Msq)-HymDsfc<7}L=FRKG%uhAdSx=!axSq%AZ`7SpgzQd{E`SbS7i%N>yh)4xPY8cX@apG9AN2(`d zngpRk@84M{?&Zl?E#Gq6teb%SBW8v=ne-4$TNlnmdo}9Q%ER^i0Dgy38}iX^MkejR zxe6t_Nc$y>d68G@JEEARaDU6gZkg<8yW>v~`3M7JyerMA76kM# zYk?Z%lu~rKq?PQ+mz1rkk}ORJX}RnDaEM=tfvLr&swwOJg1iuKr)u;UsMYd-)`#ny z_N!GWPEUB6C_Ou;)k8B1?sHGavi->i=+cL8kkY;!5%~SPfvHb=*c_T9G}0olJ?O;~ zeCophsOMOP5`X70uTziHfq-m%WANRzHz(x92&LPuj z%;%B99^+!Sh4Ytcl~bPnd7PiH^p=(GSwy7z3-p=VN7!pL_ z?9(>b8|R1gjF|~}?31p9Vm;_jwoQ7yqr31&RmMJ8D7MckeAn|=<&yJfzAp*=n!~Pg zbu`hx^0R5))fTZTQ-`>|rhl1NT+1>^AFx>Z^S)MywEM@XQOZ`I2Jx`Cq6*j9!W)B3 bq<(M-_@Y?u{~L^V;N+Wv<|uC6BD?iJW;a list[str]: + hosts: list[str] = [] + pat = re.compile(r"]\s+([^\s:]+)") + for item in li if isinstance(li, (list, tuple)) else [li]: + if isinstance(item, (tuple, list)) and len(item) >= 2: + hosts.append(item[1]) + continue + m = pat.search(str(item)) + if m: + hosts.append(m.group(1)) + return hosts + + +def wait_ssh(section: dict, timeout: int = 600) -> None: + t0 = time.time() + while time.time() - t0 < timeout: + if nodes_reachable(section): + return + time.sleep(10) + raise RuntimeError("nodes never became reachable over SSH") + + +def main() -> None: + ap = argparse.ArgumentParser("quick_k8s_experiment") + ap.add_argument("--slice-name") + ap.add_argument("--site", choices=AGG, default="wisconsin") + ap.add_argument("--hardware-type", default="c220g5") + ap.add_argument("--nodes", type=int, default=3) + ap.add_argument("--duration", type=int, default=2, help="hours") + ap.add_argument("--os-type", default="UBUNTU22-64-STD") + ap.add_argument("--ssh-user", required=True) + ap.add_argument("--ssh-key", help="private key file (optional, falls back to agent)") + ap.add_argument("--pod-network-cidr", default="192.168.0.0/16") + args = ap.parse_args() + + ctx = geni.util.loadContext() + slice_name = args.slice_name or f"k8s-{random.randint(100_000,999_999)}" + exp_time = datetime.datetime.now() + datetime.timedelta(hours=args.duration) + + # tiny RSpec + req = portal.context.makeRequestRSpec() + pcs = [] + for i in range(args.nodes): + n = req.RawPC(f"node{i}") + n.hardware_type = args.hardware_type + n.disk_image = f"urn:publicid:IDN+emulab.net+image+emulab-ops//" f"{args.os_type}" + n.routable_control_ip = True + pcs.append(n) + req.Link(members=pcs) + + print(f"🛠 Slice {slice_name} → {args.site}") + ctx.cf.createSlice(ctx, slice_name, exp=exp_time, desc="Quick K8s experiment") + manifest = AGG[args.site].createsliver(ctx, slice_name, req) + + geni.util.printlogininfo(manifest=manifest) + + hosts = _extract_hosts(geni.util._corelogininfo(manifest)) + cfg = { + "cloudlab": {"ssh_user": args.ssh_user, "ssh_key": args.ssh_key, "nodes": hosts}, + "pod_network_cidr": args.pod_network_cidr, + } + + print("⌛ Waiting for SSH …") + wait_ssh(cfg["cloudlab"]) + print("🚀 Bootstrapping Kubernetes …") + setup_cloudlab_cluster(cfg) + print("✅ Cluster ready!") + + +if __name__ == "__main__": + main() diff --git a/scripts/geni_lib/remote.py b/scripts/geni_lib/remote.py new file mode 100644 index 0000000..ac33b0e --- /dev/null +++ b/scripts/geni_lib/remote.py @@ -0,0 +1,76 @@ +import os +import time +from pathlib import Path + +import paramiko +from paramiko.ssh_exception import PasswordRequiredException, SSHException + + +class RemoteExecutor: + """Thin SSH helper around paramiko suitable for non-interactive commands.""" + + def __init__(self, host: str, user: str, key_path: str | None = None): + self.host = host + self.client = paramiko.SSHClient() + self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + keyfile: str | None = None + if key_path: + keyfile = os.path.expanduser(os.path.expandvars(key_path)) + if not Path(keyfile).is_file(): + print(f"Warning: Key file '{keyfile}' not found") + keyfile = None + else: + print(f"Using SSH key: {keyfile}") + + # Try multiple times to connect + max_retries = 5 + retry_delay = 2 + last_error = None + + for attempt in range(max_retries): + try: + self.client.connect( + hostname=host, + username=user, + key_filename=keyfile, + look_for_keys=(keyfile is None), + allow_agent=(keyfile is None), + timeout=30, + ) + return # Successfully connected + except PasswordRequiredException: + try: + self.client.connect( + hostname=host, + username=user, + look_for_keys=True, + allow_agent=True, + timeout=30, + ) + return # Successfully connected + except Exception as e: + last_error = e + except (SSHException, Exception) as e: + last_error = e + if attempt < max_retries - 1: + print(f"Connection attempt {attempt + 1} failed, retrying in 5 seconds...") + time.sleep(retry_delay) + continue + + # If we get here, all retries failed + print(f"SSH connection error to {host}: {last_error}") + raise last_error + + def exec(self, cmd: str, timeout: int | None = None) -> tuple[int, str, str]: + """Execute a command with optional timeout""" + try: + stdin, stdout, stderr = self.client.exec_command(cmd, timeout=timeout) + rc = stdout.channel.recv_exit_status() + return rc, stdout.read().decode(), stderr.read().decode() + except Exception as e: + print(f"Error executing command on {self.host}: {e}") + return 1, "", str(e) + + def close(self) -> None: + self.client.close() diff --git a/scripts/geni_lib/rspecs/test.xml b/scripts/geni_lib/rspecs/test.xml new file mode 100644 index 0000000..8ad5c47 --- /dev/null +++ b/scripts/geni_lib/rspecs/test.xml @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/scripts/terraform/.gitignore b/scripts/terraform/.gitignore new file mode 100644 index 0000000..3983ce0 --- /dev/null +++ b/scripts/terraform/.gitignore @@ -0,0 +1,33 @@ +# Local .terraform directories +**/.terraform/* + +# Terraform state files +*.tfstate +*.tfstate.* + +# Crash log files +crash.log + +# Terraform variables files +*.tfvars +*.tfvars.json + +# Override files (used for local development) +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Plan output files +*.tfplan + +# Module and provider lock files +.terraform.lock.hcl +terraform.tfstate.lock.hcl + +# Sensitive files +.env + + +# keys +*.pem \ No newline at end of file diff --git a/scripts/terraform/README.md b/scripts/terraform/README.md new file mode 100644 index 0000000..993767b --- /dev/null +++ b/scripts/terraform/README.md @@ -0,0 +1,87 @@ +## Setting up SREGym using Terraform + +This guide outlines the steps for establishing a secure connection to your Azure environment using a VPN and then provisioning resources with Terraform. This will create a two-node Kubernetes cluster with one controller and one worker node. + +**NOTE**: This will incur cloud costs as resources are created on Azure. + +**Prerequisites:** + +- **Azure VPN Connection:** Set up a secure connection to your Azure environment using a VPN client. +- **Working directory:** SREGym/scripts/terraform/ +- **Privileges:** The user should have the privileges to create resources (SSH keys, VM, network interface, network interface security group (if required), public IP, subnet, virtual network) in the selected resource group. +- **Azure CLI:** Follow the official [Microsoft documentation](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) for installing the Azure CLI for your operating system: +- **Install and initialize Terraform:** + + a. Download and install Terraform from the [official HashiCorp website](https://developer.hashicorp.com/terraform/install); + + b. To make the initial dependency selections that will initialize the dependency lock file, run: + + terraform init + +**Steps:** + +1. **Authenticate with Azure CLI** + + Open a terminal window and run the following command to log in to Azure: + + ```shell + az login + ``` + +2. **Select subscription** + + The output of az login will have a list of subscriptions you have access to. Copy the value in the "id" column of the subscription you want to work with: + + ```shell + az account set --subscription "" + ``` +3. **Verify the plan** + + *Note*: The SSH port of the VMs is open to the public. Please update the NSG resource in the main.tf file to restrict incoming traffic. Use the source_address_prefix attribute to specify allowed sources (e.g., source_address_prefix = "CorpNetPublic"). + + Create and save the plan by passing the required variables + + a) _resource_group_name_ (rg): the resource group where the resources would be created. + + b) _resource_prefix_name_ (prefix): a prefix for all the resources created using the Terraform script. + + ```shell + terraform plan -out main.tfplan -var " resource_group_name=" -var "resource_name_prefix=" + ``` +5. **Apply the saved plan** + + Note: Verify the plan from the previous step before applying it. + + ```shell + terraform apply "main.tfplan" + ``` + +6. **Setup SREGym** + Run the below script to setup SREGym on the newly provisioned resources + + ```shell + python deploy.py + ``` + On successful execution, the script outputs the SSH commands to login to the controller and worker node. Please save it. + + Please activate virtual environment before running any scripts and add the path to `wrk2` executable to PATH: + + ``` + azureuser@kubeController:~/SREGym$ source .venv/bin/activate + (.venv) azureuser@kubeController:~/SREGym/clients$ export PATH="$PATH:/home/azureuser/SREGym/TargetMicroservices/wrk2" + ``` + +**How to destroy the resources using Terraform?** + +1. Before deleting the resources, run the below command to create and save a plan (use the values previous used for resource_group_name and resource_name_prefix) + + ```shell + terraform plan -destroy -out main.destroy.tfplan -var "resource_group_name=" -var "resource_name_prefix=" + ``` + +2. Once the plan is verified, remove the resources using the below command: + + ```shell + terraform destroy main.destroy.tfplan + ``` + diff --git a/scripts/terraform/data.tf b/scripts/terraform/data.tf new file mode 100644 index 0000000..641213c --- /dev/null +++ b/scripts/terraform/data.tf @@ -0,0 +1,3 @@ +data "azurerm_resource_group" "rg" { + name = var.resource_group_name +} \ No newline at end of file diff --git a/scripts/terraform/deploy.py b/scripts/terraform/deploy.py new file mode 100644 index 0000000..5aff408 --- /dev/null +++ b/scripts/terraform/deploy.py @@ -0,0 +1,215 @@ +import logging +import os +import subprocess + +REPO = "/home/azureuser/SREGym" + +# Configure logging +logging.basicConfig(level=logging.INFO) # Change to DEBUG for more detailed logs +logger = logging.getLogger(__name__) + + +def run_command(command, capture_output=False): + """Runs a shell command and handles errors.""" + try: + logger.debug(f"Running command: {' '.join(command)}") + result = subprocess.run(command, capture_output=capture_output, text=True, check=True) + if capture_output: + logger.debug(f"Command output: {result.stdout.strip()}") + return result.stdout.strip() if capture_output else None + except subprocess.CalledProcessError as e: + logger.error(f"Command '{' '.join(command)}' failed with error: {e.stderr.strip() if e.stderr else str(e)}") + if capture_output: + return None + + +def setup_aiopslab(): + try: + run_command(["terraform", "plan", "-out", "main.tfplan"]) + output = run_command(["terraform", "apply", "main.tfplan"], capture_output=True) + if output: + logger.debug(f"Terraform apply output: {output}") + except Exception as e: + logger.error(f"Error in setup_aiopslab: {str(e)}") + + +def destroy_aiopslab(): + pass + + +def get_terraform_output(output_name): + """Retrieve Terraform output.""" + try: + result = run_command(["terraform", "output", "-raw", output_name], capture_output=True) + return result + except Exception as e: + logger.error(f"Failed to get Terraform output for {output_name}: {str(e)}") + return None + + +def save_private_key(key_data, filename): + """Save the private key to a file.""" + try: + with open(filename, "w") as key_file: + key_file.write(key_data) + os.chmod(filename, 0o600) + logger.info(f"Private key saved to {filename}") + except Exception as e: + logger.error(f"Failed to save private key to {filename}: {str(e)}") + + +def copy_and_execute_script(username, private_key, public_ip, script): + """Copy and execute the shell script on the remote VM.""" + remote_path = f"{username}@{public_ip}:/home/{username}" + try: + # Copy the shell script to the remote VM + run_command( + [ + "scp", + "-o", + "StrictHostKeyChecking=no", + "-i", + private_key, + script, + remote_path, + ] + ) + + # Execute the shell script on the remote VM + run_command( + [ + "ssh", + "-i", + private_key, + f"{username}@{public_ip}", + f"bash /home/{username}/{os.path.basename(script)}", + ] + ) + except Exception as e: + logger.error(f"Failed to copy or execute script on {public_ip}: {str(e)}") + + +def get_kubeadm_join_remote(username, private_key, public_ip): + """SSH into the remote machine and generate the kubeadm join command.""" + generate_join_command = [ + "ssh", + "-i", + private_key, + f"{username}@{public_ip}", + "sudo kubeadm token create --print-join-command", + ] + try: + print(generate_join_command) + result = run_command(generate_join_command, capture_output=True) + return result + except Exception as e: + logger.error(f"Failed to retrieve kubeadm join command from {public_ip}: {str(e)}") + return None + + +def run_kubeadm_join_on_worker(worker_username, private_key, worker_ip, join_command): + """SSH into the worker and run the kubeadm join command.""" + ssh_command = [ + "ssh", + "-i", + private_key, + f"{worker_username}@{worker_ip}", + f"sudo {join_command} --cri-socket /var/run/cri-dockerd.sock", + ] + try: + run_command(ssh_command) + except Exception as e: + logger.error(f"Failed to run kubeadm join on {worker_ip}: {str(e)}") + + +def add_ssh_key(host, port=22): + """Runs ssh-keyscan on a given host and appends the key to known_hosts.""" + try: + # Build the ssh-keyscan command + keyscan_cmd = ["ssh-keyscan", "-H", "-p", str(port), host] + + # Run the ssh-keyscan command and capture output + result = run_command(keyscan_cmd, capture_output=True) + + # Append the output (host key) to known_hosts + with open(os.path.expanduser("~/.ssh/known_hosts"), "a") as known_hosts_file: + known_hosts_file.write(result) + logger.info(f"SSH key for {host} added to known_hosts.") + except subprocess.CalledProcessError as e: + logger.error(f"Failed to fetch SSH key for {host}: {e}") + except Exception as ex: + logger.error(f"An error occurred while adding SSH key for {host}: {ex}") + + +def deploy_prometheus(username, private_key_file_1, public_ip_1): + """Deploy Prometheus on the worker node.""" + try: + run_command( + [ + "ssh", + "-o", + "StrictHostKeyChecking=no", + "-i", + private_key_file_1, + f"{username}@{public_ip_1}", + f"bash {REPO}/scripts/setup.sh kubeworker1", + ] + ) + except Exception as e: + logger.error(f"Failed to deploy Prometheus: {str(e)}") + + +def main(): + # Retrieve private keys and public IPs for both VMs + private_key_1 = get_terraform_output("key_data_1") + private_key_2 = get_terraform_output("key_data_2") + public_ip_1 = get_terraform_output("public_ip_address_1") + public_ip_2 = get_terraform_output("public_ip_address_2") + username = "azureuser" # TODO: read from variables file + + if not private_key_1 or not private_key_2 or not public_ip_1 or not public_ip_2: + logger.error("Failed to retrieve required Terraform outputs.") + return + + # Save the private keys to files + private_key_file_1 = "vm_1_private_key.pem" + private_key_file_2 = "vm_2_private_key.pem" + save_private_key(private_key_1, private_key_file_1) + save_private_key(private_key_2, private_key_file_2) + + # Path to the shell script + kubeadm_shell_script = f"./scripts/kubeadm.sh" + controller_shell_script = f"./scripts/kube_controller.sh" + setup_sregym_script = f"./scripts/setup_sregym.sh" + prom_worker_setup_script = f"./scripts/prom_on_worker.sh" + + # Install kubeadm on all the VMs + copy_and_execute_script(username, private_key_file_1, public_ip_1, kubeadm_shell_script) + copy_and_execute_script(username, private_key_file_2, public_ip_2, kubeadm_shell_script) + + # Setup kube controller + copy_and_execute_script(username, private_key_file_1, public_ip_1, controller_shell_script) + + # Get join command and run on the worker + join_command = get_kubeadm_join_remote(username, private_key_file_1, public_ip_1) + + if join_command: + logger.info(f"Join command retrieved: {join_command}") + run_kubeadm_join_on_worker(username, private_key_file_2, public_ip_2, join_command) + + # Setup sregym + copy_and_execute_script(username, private_key_file_1, public_ip_1, setup_sregym_script) + + # Deploy Prometheus on the worker node) + copy_and_execute_script(username, private_key_file_2, public_ip_2, prom_worker_setup_script) + deploy_prometheus(username, private_key_file_1, public_ip_1) + + # print public ip of controller and worker and give ssh command to access it + logger.info(f"Controller Public IP: {public_ip_1}") + logger.info(f"Worker Public IP: {public_ip_2}") + logger.info(f"SSH command to access controller: ssh -i {private_key_file_1} {username}@{public_ip_1}") + logger.info(f"SSH command to access worker: ssh -i {private_key_file_2} {username}@{public_ip_2}") + + +if __name__ == "__main__": + main() diff --git a/scripts/terraform/main.tf b/scripts/terraform/main.tf new file mode 100644 index 0000000..77db20c --- /dev/null +++ b/scripts/terraform/main.tf @@ -0,0 +1,197 @@ +# Create virtual network +resource "azurerm_virtual_network" "aiopslab_network" { + name = "${var.resource_name_prefix}_aiopslabVnet" + address_space = ["10.0.0.0/16"] + location = var.resource_location + resource_group_name = var.resource_group_name +} + +# Create subnet +resource "azurerm_subnet" "aiopslab_subnet" { + name = "${var.resource_name_prefix}_aiopslabSubnet" + resource_group_name = var.resource_group_name + virtual_network_name = azurerm_virtual_network.aiopslab_network.name + address_prefixes = ["10.0.1.0/24"] +} + +# Create public IPs +resource "azurerm_public_ip" "aiopslab_public_ip_1" { + name = "${var.resource_name_prefix}_aiopslabPublicIP_1" + location = var.resource_location + resource_group_name = var.resource_group_name + allocation_method = "Dynamic" +} + +resource "azurerm_public_ip" "aiopslab_public_ip_2" { + name = "${var.resource_name_prefix}_aiopslabPublicIP_2" + location = var.resource_location + resource_group_name = var.resource_group_name + allocation_method = "Dynamic" +} + +# Create Network Security Group and rule with only CorpNet access +resource "azurerm_network_security_group" "aiopslab_nsg_1" { + name = "${var.resource_name_prefix}_aiopslabNSG_1" + location = var.resource_location + resource_group_name = var.resource_group_name + + security_rule { + name = "SSH" + priority = 1001 + direction = "Inbound" + access = "Allow" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "22" + source_address_prefix = "*" + destination_address_prefix = "*" + } +} + +resource "azurerm_network_security_group" "aiopslab_nsg_2" { + name = "${var.resource_name_prefix}_aiopslabNSG_2" + location = var.resource_location + resource_group_name = var.resource_group_name + + security_rule { + name = "SSH" + priority = 1001 + direction = "Inbound" + access = "Allow" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "22" + source_address_prefix = "*" + destination_address_prefix = "*" + } +} + +# Create network interfaces +resource "azurerm_network_interface" "aiopslab_nic_1" { + name = "${var.resource_name_prefix}_aiopslabNIC_1" + location = var.resource_location + resource_group_name = var.resource_group_name + + ip_configuration { + name = "${var.resource_name_prefix}_aioplabNICConfiguration_1" + subnet_id = azurerm_subnet.aiopslab_subnet.id + private_ip_address_allocation = "Dynamic" + public_ip_address_id = azurerm_public_ip.aiopslab_public_ip_1.id + } +} + +resource "azurerm_network_interface" "aiopslab_nic_2" { + name = "${var.resource_name_prefix}_aiopslabNIC_2" + location = var.resource_location + resource_group_name = var.resource_group_name + + ip_configuration { + name = "${var.resource_name_prefix}_aioplabNICConfiguration_2" + subnet_id = azurerm_subnet.aiopslab_subnet.id + private_ip_address_allocation = "Dynamic" + public_ip_address_id = azurerm_public_ip.aiopslab_public_ip_2.id + } +} + +# Connect the security groups to the network interfaces +resource "azurerm_network_interface_security_group_association" "aiopslab_nsg_association_1" { + network_interface_id = azurerm_network_interface.aiopslab_nic_1.id + network_security_group_id = azurerm_network_security_group.aiopslab_nsg_1.id +} + +resource "azurerm_network_interface_security_group_association" "aiopslab_nsg_association_2" { + network_interface_id = azurerm_network_interface.aiopslab_nic_2.id + network_security_group_id = azurerm_network_security_group.aiopslab_nsg_2.id +} + +resource "random_id" "random_id" { + byte_length = 8 +} + + +# Create storage accounts for boot diagnostics +resource "azurerm_storage_account" "aiopslab_storage_account_1" { + # storage account names can only consist of lowercase letters and numbers + name = "diag${random_id.random_id.hex}1" + location = var.resource_location + resource_group_name = var.resource_group_name + account_tier = "Standard" + account_replication_type = "LRS" +} + +resource "azurerm_storage_account" "aiopslab_storage_account_2" { + name = "diag${random_id.random_id.hex}2" + location = var.resource_location + resource_group_name = var.resource_group_name + account_tier = "Standard" + account_replication_type = "LRS" +} + + + +# Create virtual machines +resource "azurerm_linux_virtual_machine" "aiopslab_vm_1" { + name = "${var.resource_name_prefix}_aiopslabVM_1" + location = var.resource_location + resource_group_name = var.resource_group_name + network_interface_ids = [azurerm_network_interface.aiopslab_nic_1.id] + size = "Standard_D4s_v3" + + os_disk { + name = "${var.resource_name_prefix}_OsDisk_1" + caching = "ReadWrite" + storage_account_type = "Premium_LRS" + } + + source_image_reference { + publisher = "Canonical" + offer = "0001-com-ubuntu-server-jammy" + sku = "22_04-lts-gen2" + version = "latest" + } + + computer_name = "kubeController" + admin_username = var.username + + admin_ssh_key { + username = var.username + public_key = azapi_resource_action.aiopslab_ssh_public_key_gen_1.output.publicKey + } + + boot_diagnostics { + storage_account_uri = azurerm_storage_account.aiopslab_storage_account_1.primary_blob_endpoint + } +} + +resource "azurerm_linux_virtual_machine" "aiopslab_vm_2" { + name = "${var.resource_name_prefix}_aiopslabVM_2" + location = var.resource_location + resource_group_name = var.resource_group_name + network_interface_ids = [azurerm_network_interface.aiopslab_nic_2.id] + size = "Standard_F16s_v2" + + os_disk { + name = "${var.resource_name_prefix}_OsDisk_2" + caching = "ReadWrite" + storage_account_type = "Premium_LRS" + } + + source_image_reference { + publisher = "Canonical" + offer = "0001-com-ubuntu-server-jammy" + sku = "22_04-lts-gen2" + version = "latest" + } + + computer_name = "kubeWorker1" + admin_username = var.username + + admin_ssh_key { + username = var.username + public_key = azapi_resource_action.aiopslab_ssh_public_key_gen_2.output.publicKey + } + + boot_diagnostics { + storage_account_uri = azurerm_storage_account.aiopslab_storage_account_2.primary_blob_endpoint + } +} diff --git a/scripts/terraform/outputs.tf b/scripts/terraform/outputs.tf new file mode 100644 index 0000000..e612f0a --- /dev/null +++ b/scripts/terraform/outputs.tf @@ -0,0 +1,19 @@ +output "public_ip_address_1" { + value = azurerm_linux_virtual_machine.aiopslab_vm_1.public_ip_address +} + +output "public_ip_address_2" { + value = azurerm_linux_virtual_machine.aiopslab_vm_2.public_ip_address +} + +output "key_data_1" { + value = azapi_resource_action.aiopslab_ssh_public_key_gen_1.output.privateKey +} + +output "key_data_2" { + value = azapi_resource_action.aiopslab_ssh_public_key_gen_2.output.privateKey +} + +output "username" { + value = var.username +} \ No newline at end of file diff --git a/scripts/terraform/providers.tf b/scripts/terraform/providers.tf new file mode 100644 index 0000000..ca5acba --- /dev/null +++ b/scripts/terraform/providers.tf @@ -0,0 +1,23 @@ +terraform { + required_version = ">=0.12" + + required_providers { + azapi = { + source = "azure/azapi" + version = "~>1.5" + } + azurerm = { + source = "hashicorp/azurerm" + version = "~>2.0" + } + random = { + source = "hashicorp/random" + version = "~>3.0" + } + } +} + +provider "azurerm" { + features {} + skip_provider_registration = true +} \ No newline at end of file diff --git a/scripts/terraform/scripts/kube_controller.sh b/scripts/terraform/scripts/kube_controller.sh new file mode 100644 index 0000000..197b303 --- /dev/null +++ b/scripts/terraform/scripts/kube_controller.sh @@ -0,0 +1,25 @@ +#All below steps only for setting up controller + +sudo systemctl enable --now kubelet +sudo kubeadm config images pull --cri-socket /var/run/cri-dockerd.sock +sudo kubeadm init --pod-network-cidr=10.244.0.0/16 --cri-socket /var/run/cri-dockerd.sock + +#copy kubeconfig to user +mkdir -p $HOME/.kube +sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config +sudo chown $(id -u):$(id -g) $HOME/.kube/config + + +# install network +wget https://github.com/flannel-io/flannel/releases/latest/download/kube-flannel.yml +sleep 3 +kubectl apply -f kube-flannel.yml +sudo systemctl status kubelet --no-pager + + +# commands to troubleshoot +ip addr | grep cni +kubectl get pods -n kube-system + + + diff --git a/scripts/terraform/scripts/kubeadm.sh b/scripts/terraform/scripts/kubeadm.sh new file mode 100644 index 0000000..aea5154 --- /dev/null +++ b/scripts/terraform/scripts/kubeadm.sh @@ -0,0 +1,136 @@ +#https://www.nathanobert.com/posts/blog-kubernetes-on-ubuntu/ +#https://earthly.dev/blog/deploy-kubernetes-cri-o-container-runtime/ +#https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/install-kubeadm/ + + +# refresh the package list (info about available packages and their version) from the repositories +sudo apt update -y + +# upgrade installed packages to their latest version +sudo apt upgrade -y + + +# removes any automatically installed packages that are not longer needed + + + +# install docker - https://docs.docker.com/engine/install/ubuntu/ (follow the docker page) + +# remove unofficial versions of docker if any +for pkg in docker.io docker-doc docker-compose docker-compose-v2 podman-docker containerd runc; do sudo apt-get remove $pkg; done + + +#Set up Docker's apt repository. + +sudo apt update +sudo apt install ca-certificates curl +# Creates a directory (/etc/apt/keyrings) to store the GPG key. (install doesn't install anything - https://linuxhandbook.com/install-command/) +sudo install -m 0755 -d /etc/apt/keyrings +# download Docker's official GPG keys +sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc +# set read permissions for all users on the key file. +sudo chmod a+r /etc/apt/keyrings/docker.asc + +# Add the repository to Apt sources: +# deb is the format, as well as filename extension of the software package format for the Debian Linux distribution and its derivatives. +# this line tells APT to use the specified Docker repository for Ubuntu 22.04 (Jammy) with the appropriate architecture and GPG key verification +echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null + +# refresh the package list with the newly added repository +sudo apt update + +# install the packages + +sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + +# start service if docker is not running + +sudo systemctl status docker.service --no-pager; +sudo service docker start + +# test if docker is properly installed +# https://docs.docker.com/engine/install/linux-postinstall/, https://askubuntu.com/questions/477551/how-can-i-use-docker-without-sudo +# The Docker daemon (the core component of Docker) communicates with the Docker CLI (command-line interface) via a Unix socket (not a TCP port). +# By default, this Unix socket is owned by the root user. +# Without proper permissions, non-root users cannot access this socket. Docker daemon while creating the socket, creates a group called docker +# with #read/write privileges. So we can skip using sudo before every command by adding the user to docker group +sudo usermod -aG docker $USER + +# for the above command to take effect, we need to start a new session. If we don't want to do that, we can use newgrp to start a new session with +# with the specified group as the group ID. Only needed for debugging. +# newgrp docker && docker run hello-world + + +# Install cri-dockerd - the driver between Kubernetes CRI interface and the docker APIs +# pick the latest stable release from here - +# wget https://github.com/Mirantis/cri-dockerd/releases/download//cri-dockerd_<>.deb +wget https://github.com/Mirantis/cri-dockerd/releases/download/v0.3.12/cri-dockerd_0.3.12.3-0.ubuntu-jammy_amd64.deb +sudo apt install ./cri-dockerd_0.3.12.3-0.ubuntu-jammy_amd64.deb -y + + +sudo systemctl status docker.service --no-pager; +sudo systemctl status cri-docker.service --no-pager; + + + + +# kubeadm installation - https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/install-kubeadm/ +# apt-transport-https may be a dummy package; if so, you can skip that package +sudo apt-get install -y apt-transport-https ca-certificates curl gpg + +# add kube repository +echo "deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.29/deb/ /" | sudo tee /etc/apt/sources.list.d/kubernetes.list + + +#Download the public signing key for the Kubernetes package repositories. The same signing key is used for all repositories, so you can disregard #the version in the URL: + +curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.29/deb/Release.key | sudo gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg + + +# update the packages +sudo apt update; + +# install packages +# Don't pass "-y" flag as packages could already be installed and held at a particular version +# sudo apt-get install -qy kubeadm kubelet kubectl --allow-change-held-packages +sudo apt-get install -qy kubeadm kubelet kubectl + + +# avoids the package being automatically updated. Good for stability +sudo apt-mark hold kubeadm kubelet kubectl; +kubeadm version; + + +# disable swap +sudo swapoff -a +sudo sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab + +# check if swap is disabled +free -m + + +#Enable the necessary kernel modules: overlay & br_netfilter + +sudo modprobe overlay +sudo modprobe br_netfilter + +#create script to load modules on every reboot +#heredoc example - https://stackoverflow.com/questions/2500436/how-does-cat-eof-work-in-bash + +cat < Optional[AgentProcess]: + if not reg or not reg.kickoff_command: + return None + existing = self._procs.get(reg.name) + + if existing: + existing.proc.poll() + if existing.proc.returncode is None: + return existing + + env = os.environ.copy() + if reg.kickoff_env: + env.update(reg.kickoff_env) + + proc = subprocess.Popen( + reg.kickoff_command, + shell=True, + cwd=reg.kickoff_workdir or os.getcwd(), + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + universal_newlines=True, + ) + ap = AgentProcess(reg.name, proc) + self._procs[reg.name] = ap + t = threading.Thread(target=self._pipe_logs, args=(reg.name, proc), daemon=True) + t.start() + return ap + + def _pipe_logs(self, name: str, proc: subprocess.Popen): + if proc.stdout is None: + return + for line in proc.stdout: + try: + sys.stdout.write(f"{line}") + sys.stdout.flush() + except Exception: + break diff --git a/sregym/agent_registry.py b/sregym/agent_registry.py new file mode 100644 index 0000000..c59f983 --- /dev/null +++ b/sregym/agent_registry.py @@ -0,0 +1,41 @@ +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Dict, Optional +import os, yaml + +DEFAULT_REG_PATH = Path(os.environ.get("SREGYM_AGENT_REGISTRY", "agents.yaml")) + +@dataclass +class AgentRegistration: + name: str + kickoff_command: str | None = None + kickoff_workdir: str | None = None + kickoff_env: Dict[str, str] | None = None + +def _ensure_file(path: Path): + if not path.exists(): + path.write_text(yaml.safe_dump({"agents": []}, sort_keys=False)) + +def list_agents(path: Path = DEFAULT_REG_PATH) -> Dict[str, AgentRegistration]: + _ensure_file(path) + data = yaml.safe_load(path.read_text()) or {"agents": []} + out: Dict[str, AgentRegistration] = {} + for a in data.get("agents", []): + out[a["name"]] = AgentRegistration( + name=a["name"], + kickoff_command=a.get("kickoff_command"), + kickoff_workdir=a.get("kickoff_workdir"), + kickoff_env=a.get("kickoff_env") or {}, + ) + return out + +def get_agent(name: str, path: Path = DEFAULT_REG_PATH) -> Optional[AgentRegistration]: + return list_agents(path).get(name) + +def save_agent(reg: AgentRegistration, path: Path = DEFAULT_REG_PATH) -> None: + _ensure_file(path) + data = yaml.safe_load(path.read_text()) or {"agents": []} + agents = [x for x in data.get("agents", []) if x.get("name") != reg.name] + agents.append(asdict(reg)) + data["agents"] = agents + path.write_text(yaml.safe_dump(data, sort_keys=False)) diff --git a/sregym/conductor/__init__.py b/sregym/conductor/__init__.py new file mode 100644 index 0000000..affab6d --- /dev/null +++ b/sregym/conductor/__init__.py @@ -0,0 +1 @@ +from .conductor import Conductor diff --git a/sregym/conductor/conductor.py b/sregym/conductor/conductor.py new file mode 100644 index 0000000..4610041 --- /dev/null +++ b/sregym/conductor/conductor.py @@ -0,0 +1,562 @@ +import logging +import shutil +import time +from pathlib import Path + +import yaml + +from dashboard.proxy import LogProxy +from sregym.conductor.constants import StartProblemResult +from sregym.conductor.oracles.detection import DetectionOracle +from sregym.conductor.oracles.diagnosis_oracle import DiagnosisOracle +from sregym.conductor.problems.registry import ProblemRegistry +from sregym.conductor.utils import is_ordered_subset +from sregym.generators.fault.inject_remote_os import RemoteOSFaultInjector +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.generators.noise.transient_issues.transient_issues import FaultType, PodScope, TransientIssuesGenerator +from sregym.service.apps.app_registry import AppRegistry +from sregym.service.dm_dust_manager import DmDustManager +from sregym.service.dm_flakey_manager import DmFlakeyManager +from sregym.service.khaos import KhaosController +from sregym.service.kubectl import KubeCtl +from sregym.service.telemetry.prometheus import Prometheus + + +class Conductor: + def __init__(self): + # core services + self.problems = ProblemRegistry() + self.kubectl = KubeCtl() + self.prometheus = Prometheus() + self.apps = AppRegistry() + self.agent_name = None + + self.khaos = KhaosController(self.kubectl) + self.dm_dust_manager = DmDustManager(self.kubectl) + self.dm_flakey_manager = DmFlakeyManager(self.kubectl) + + self.problem = None + self.detection_oracle = None + self.problem_id = None + self.problem = None + self.app = None + self.detection_oracle = None + self.execution_start_time = None + + # grading flow state + # submission_stage reflects the current AgentAct name (e.g., "diagnosis", "mitigation") or "done" + self.submission_stage = None + self.results = {} + + self.tasklist = None + self.logger = logging.getLogger("sregym-global") # this is for dashboard + self.local_logger = logging.getLogger("all.sregym.conductor") + + self.transient_config = { + "switch": False, + "min_duration": 40, + "max_duration": 60, + "fault_types": [FaultType.FAIL_SLOW, FaultType.FAIL_STOP], + "scopes": [PodScope.TARGET_NAMESPACE], + "interval_min": 20, + "interval_max": 30, + } + + self.act_sequence: list[dict] = [] + self.current_act_index: int = 0 + self.current_agent_act_index: int | None = None + self.waiting_for_agent: bool = False + + def register_agent(self, name="agent"): + self.agent_name = name + + def dependency_check(self, binaries: list[str]): + for b in binaries: + if shutil.which(b) is None: + self.local_logger.error(f"Required dependency '{b}' not found.") + raise RuntimeError(f"[❌] Required dependency '{b}' not found.") + + def get_tasklist(self): + file_dir = Path(__file__).resolve().parent + tasklist_path = file_dir / "tasklist.yml" + + # If tasklist file doesn't exist, default to running diagnosis + mitigation + if not tasklist_path.exists(): + self.local_logger.info( + "No tasklist.yml found. Defaulting to running diagnosis and mitigation for this problem." + ) + self.tasklist = ["diagnosis", "mitigation"] + return + + with open(tasklist_path, "r") as f: + tasklist = yaml.safe_load(f) + if not tasklist: + msg = "Badly formatted tasklist.yml" + self.local_logger.error(msg) + raise RuntimeError(msg) + problems = tasklist["all"]["problems"] + + if self.problem_id not in (problems if problems else []): + self.local_logger.warning( + "problem_id not found in tasklist. Defaulting to running diagnosis and mitigation." + ) + self.tasklist = ["diagnosis", "mitigation"] + else: + problem_tasklist = problems[self.problem_id] + if not problem_tasklist: + msg = f"No tasks specified for {self.problem_id}" + self.local_logger.error(msg) + raise RuntimeError(msg) + + if not is_ordered_subset(problem_tasklist, ["diagnosis", "mitigation"]): + msg = f"Task list for {self.problem_id} is either out of order or has an unknown step (allowed: diagnosis, mitigation)" + self.local_logger.error(msg) + raise RuntimeError(msg) + + self.local_logger.info( + f"Tasklist specified for {self.problem_id}. Configured AgentActs to run: {problem_tasklist}" + ) + + # Use the tasklist as-is (only AgentAct names, e.g., diagnosis, mitigation) + self.tasklist = problem_tasklist + + def _build_act_sequence(self): + self.act_sequence = [] + self.current_act_index = 0 + self.current_agent_act_index = None + self.waiting_for_agent = False + + if not self.tasklist: + self.local_logger.warning("Empty tasklist; no AgentActs configured for this problem.") + return + + # Map AgentAct names to their precondition/evaluation functions + agent_act_definitions = { + "diagnosis": { + "precondition": self._precondition_diagnosis, + "evaluation": self._evaluate_diagnosis, + }, + "mitigation": { + "precondition": self._precondition_mitigation, + "evaluation": self._evaluate_mitigation, + }, + } + + # Determine which AgentActs are actually available (oracle attached) + configured_agent_acts: list[dict] = [] + for name in self.tasklist: + if name not in agent_act_definitions: + self.local_logger.warning(f"Unknown AgentAct '{name}' in tasklist; skipping.") + continue + + if name == "diagnosis": + if getattr(self.problem, "diagnosis_oracle", None): + configured_agent_acts.append( + { + "type": "AgentAct", + "name": name, + "precondition": agent_act_definitions[name]["precondition"], + "evaluation": agent_act_definitions[name]["evaluation"], + } + ) + else: + self.local_logger.info("⏩ Diagnosis oracle is not attached. Skipping diagnosis.") + + elif name == "mitigation": + if getattr(self.problem, "mitigation_oracle", None): + configured_agent_acts.append( + { + "type": "AgentAct", + "name": name, + "precondition": agent_act_definitions[name]["precondition"], + "evaluation": agent_act_definitions[name]["evaluation"], + } + ) + else: + self.local_logger.info("⏩ Mitigation oracle is not attached. Skipping mitigation.") + + if not configured_agent_acts: + self.local_logger.warning( + "No AgentActs left after checking oracles. This problem will complete without agent interaction." + ) + return + + # Default GymAct: inject fault before the first AgentAct precondition + self.act_sequence.append( + { + "type": "GymAct", + "name": "inject_fault", + "op": self._gymact_inject_fault, + } + ) + + # Append AgentActs in order + self.act_sequence.extend(configured_agent_acts) + + def _gymact_inject_fault(self): + self.problem.inject_fault() + self.logger.info("[ENV] Injected fault") + + # Prepare diagnosis checkpoint if available, after fault injection but before agent acts + if ( + hasattr(self.problem, "diagnosis_oracle") + and self.problem.diagnosis_oracle + and isinstance(self.problem.diagnosis_oracle, DiagnosisOracle) + ): + self.problem.diagnosis_oracle.load_localization_checkpoint() + self.local_logger.info("Diagnosis checkpoint loaded after fault injection.") + + # FIXME: Disabled until https://github.com/xlab-uiuc/SREGym/issues/296 is complete + # self.configure_transient_issues() + # if self.transient_config["switch"]: + # self._start_transient_issues() + + # -------- AgentAct: diagnosis -------- + def _precondition_diagnosis(self): + self.local_logger.info("Precondition for Diagnosis AgentAct executed. No real action.") + + def _evaluate_diagnosis(self, solution): + """Evaluation logic for diagnosis AgentAct.""" + self.local_logger.info("Start Eval for Diagnosis", extra={"sol": solution}) + r = self.problem.diagnosis_oracle.evaluate(solution) + self.results["Diagnosis"] = r + self.results["TTL"] = time.time() - self.execution_start_time + self.logger.info( + f"[EVAL] Diagnosis " + f"{'Succeed' if self.results['Diagnosis']['success'] else 'Failed'}\n " + f"TTL: {self.results['TTL']}" + ) + return r + + # -------- AgentAct: mitigation -------- + def _precondition_mitigation(self): + self.local_logger.info("Precondition for Mitigation AgentAct executed. No real action.") + + def _evaluate_mitigation(self, solution): + """Evaluation logic for mitigation AgentAct.""" + # Currently mitigation_oracle.evaluate() does not take the agent solution directly. + self.local_logger.info("Start Eval for Mitigation", extra={"sol": solution}) + r = self.problem.mitigation_oracle.evaluate() + self.results["Mitigation"] = r + self.results["TTM"] = time.time() - self.execution_start_time + self.logger.info( + f"[EVAL] Mitigation " + f"{'Succeed' if self.results['Mitigation']['success'] else 'Failed'}\n " + f"TTM: {self.results['TTM']}" + ) + return r + + def _advance_to_next_agent_act_precondition(self, start_index: int = 0): + """ + Execute Acts sequentially starting from start_index until: + - The precondition of the next AgentAct is executed (inclusive), then wait for agent submission; or + - There are no more AgentActs, in which case finish the problem. + """ + self.current_agent_act_index = None + self.waiting_for_agent = False + self.current_act_index = start_index + + if not self.act_sequence: + self.local_logger.info("No Acts configured; finishing problem immediately.") + self._finish_problem() + return + + i = start_index + while i < len(self.act_sequence): + act = self.act_sequence[i] + act_type = act.get("type") + act_name = act.get("name") + + if act_type == "GymAct": + self.local_logger.debug(f"Executing GymAct '{act_name}'") + act["op"]() + i += 1 + continue + + if act_type == "AgentAct": + self.local_logger.debug(f"Executing precondition for AgentAct '{act_name}' and waiting for agent.") + act["precondition"]() + self.current_agent_act_index = i + self.waiting_for_agent = True + self.submission_stage = act_name + self.current_act_index = i + self.logger.info(f"[STAGE] Go to stage {self.submission_stage}") + return + + self.local_logger.warning(f"Unknown Act type '{act_type}' for Act '{act_name}'; skipping.") + i += 1 + + # No more AgentActs; finish the problem + self._finish_problem() + + def _finish_problem(self): + self.submission_stage = "done" + + self.logger.info(f"[STAGE] Done, recover fault") + + if self.transient_config["switch"] and hasattr(self, "transient_issue_generator"): + self.transient_issue_generator.stop_continuous_injection() + + if self.problem: + self.problem.recover_fault() + + self.logger.info(f"[STAGE] Undeploy app") + self.undeploy_app() + + async def start_problem(self) -> StartProblemResult: + """ + 1) Provision infra & workload + 2) Initialize Act registry and execute initial GymActs and first AgentAct precondition + + Returns: + StartProblemResult: Result status indicating success or skip reason + """ + self.execution_start_time = time.time() + self.problem = self.problems.get_problem_instance(self.problem_id) + self.app = self.problem.app + self.detection_oracle = DetectionOracle(self.problem) + self.results = {} + + self.dependency_check(["kubectl", "helm"]) + self.local_logger.debug(f"Dependency check passed: kubectl, helm") + + self.local_logger.info(f"[Session Start] Problem ID: {self.problem_id}") + self.logger.info(f"[STAGE] Start testing on problem: {self.problem_id}") + + if self.problem.requires_khaos() and self.kubectl.is_emulated_cluster(): + self.local_logger.warning( + f"Problem '{self.problem_id}' requires Khaos for eBPF-based fault injection, " + "but Khaos cannot be deployed on emulated clusters (kind, minikube, k3d, etc.). " + "Skipping this problem." + ) + return StartProblemResult.SKIPPED_KHAOS_REQUIRED + + self.fix_kubernetes() + + self.get_tasklist() + self._build_act_sequence() + + self.local_logger.info("Undeploying app leftovers...") + self.undeploy_app() # Cleanup any leftovers + self.local_logger.info("App leftovers undeployed.") + self.local_logger.info("Deploying app...") + self.deploy_app() + self.local_logger.info("App deployed.") + # After deployment, execute Acts until the first AgentAct precondition is reached. + self._advance_to_next_agent_act_precondition(start_index=0) + + if self.submission_stage and self.submission_stage != "done": + self.local_logger.info( + f"✅ Deployment complete. Ready for submission. Current stage is: {self.submission_stage}" + ) + else: + self.local_logger.info( + "✅ Deployment complete. No AgentAct configured; problem will complete without agent submission." + ) + return StartProblemResult.SUCCESS + + async def submit(self, wrapped_cmd: str) -> dict: + """ + Called by CLI or HTTP /submit. Parses & grades the `submit(...)` call, + advances submission_stage, records results—and when we hit “done”, + triggers undeploy_app. Returns a snapshot of the results dict. + """ + from sregym.conductor.parser import ResponseParser + + parser = ResponseParser() + parsed = parser.parse(wrapped_cmd) + if parsed["api_name"] != "submit": + raise ValueError("Only `submit(...)` is supported.") + sol = parsed["args"][0] if parsed["args"] else None + + # If all tasks are already completed, simply return the final snapshot. + if self.submission_stage == "done": + self.local_logger.info("All tasks already completed; ignoring new submission.") + return dict(self.results) + + if not self.act_sequence: + self.local_logger.warning("submit() called but no Acts are configured; returning current results.") + return dict(self.results) + + if self.current_agent_act_index is None or not self.waiting_for_agent: + self.local_logger.error( + "submit() called when conductor is not waiting for an AgentAct evaluation. " + f"Current submission_stage={self.submission_stage}" + ) + raise RuntimeError("Conductor is not currently waiting for an agent submission.") + + current_act = self.act_sequence[self.current_agent_act_index] + if current_act.get("type") != "AgentAct": + self.local_logger.error( + f"Internal error: current_act at index {self.current_agent_act_index} is not an AgentAct." + ) + raise RuntimeError("Invalid Act configuration.") + + act_name = current_act.get("name") + self.local_logger.info(f"Evaluating AgentAct '{act_name}'", extra={"sol": sol}) + # Run the evaluation function for the current AgentAct + current_act["evaluation"](sol) + + # After evaluation, advance to the next AgentAct precondition (if any) + next_index = self.current_agent_act_index + 1 + self._advance_to_next_agent_act_precondition(start_index=next_index) + + return dict(self.results) + + def fix_kubernetes(self): + self.local_logger.info("Fixing Kubernetes... to normal state.") + self.local_logger.info("[FIX] Imbalance leftover if any") + + injector = VirtualizationFaultInjector(namespace="kube-system") + injector.recover_daemon_set_image_replacement( + daemon_set_name="kube-proxy", original_image="registry.k8s.io/kube-proxy:v1.31.13" + ) + + self.local_logger.info("[FIX] KubeletCrash leftover if any") + injector = RemoteOSFaultInjector() + injector.recover_kubelet_crash() + self.local_logger.info("Fix Kubernetes completed.") + + def deploy_app(self): + """Kubectl + Prometheus + problem.app deployment.""" + self.submission_stage = "setup" + self.local_logger.info("[DEPLOY] Setting up metrics-server…") + self.kubectl.exec_command( + "kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/" + "releases/latest/download/components.yaml" + ) + self.kubectl.exec_command( + "kubectl -n kube-system patch deployment metrics-server " + "--type=json -p='[" + '{"op":"add","path":"/spec/template/spec/containers/0/args/-","value":"--kubelet-insecure-tls"},' + '{"op":"add","path":"/spec/template/spec/containers/0/args/-","value":"--kubelet-preferred-address-types=InternalIP"}' + "]'" + ) + self.kubectl.wait_for_ready("kube-system") + + # Only deploy Khaos if the problem requires it + if self.problem and self.problem.requires_khaos(): + self.local_logger.info("[DEPLOY] Deploying Khaos DaemonSet...") + self.khaos.ensure_deployed() + + self.local_logger.info("[DEPLOY] Setting up OpenEBS…") + self.kubectl.exec_command("kubectl apply -f https://openebs.github.io/charts/openebs-operator.yaml") + self.kubectl.exec_command( + "kubectl patch storageclass openebs-hostpath " + '-p \'{"metadata":{"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}\'' + ) + self.kubectl.wait_for_ready("openebs") + + print("Setting up OpenEBS LocalPV-Device…") + device_sc_yaml = """ + apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + name: openebs-device + annotations: + openebs.io/cas-type: local + provisioner: openebs.io/local + parameters: + localpvType: "device" + volumeBindingMode: WaitForFirstConsumer + """ + self.kubectl.exec_command("kubectl apply -f - <" }` → grades the current stage +- **GET /status**: returns `{ "stage": "setup" | "noop" | "detection" | "diagnosis" | "mitigation" | "done" }` +""" + ) + ) + + config = Config(app=app, host=host, port=port, log_level="info") + config.install_signal_handlers = False + server = Server(config) + _server = server # expose to request_shutdown() + + # watcher thread: when _shutdown_event is set, flip server.should_exit + def _watch(): + _shutdown_event.wait() + local_logger.debug("API server shutdown event received") + server.should_exit = True + + threading.Thread(target=_watch, name="api-shutdown-watcher", daemon=True).start() + + try: + local_logger.debug("API server is running") + server.run() # blocks until should_exit becomes True + finally: + # cleanup for potential reuse + _shutdown_event.clear() + _server = None diff --git a/sregym/conductor/constants.py b/sregym/conductor/constants.py new file mode 100644 index 0000000..2ba326c --- /dev/null +++ b/sregym/conductor/constants.py @@ -0,0 +1,8 @@ +from enum import StrEnum + + +class StartProblemResult(StrEnum): + + SUCCESS = "success" + SKIPPED_KHAOS_REQUIRED = "skipped_khaos_required" + diff --git a/sregym/conductor/oracles/assign_non_existent_node_mitigation.py b/sregym/conductor/oracles/assign_non_existent_node_mitigation.py new file mode 100644 index 0000000..1fab765 --- /dev/null +++ b/sregym/conductor/oracles/assign_non_existent_node_mitigation.py @@ -0,0 +1,53 @@ +from sregym.conductor.oracles.base import Oracle + + +class AssignNonExistentNodeMitigationOracle(Oracle): + importance = 1.0 + + def evaluate(self) -> dict: + print("== Mitigation Evaluation ==") + + kubectl = self.problem.kubectl + namespace = self.problem.namespace + faulty_service = self.problem.faulty_service + results = {} + + # Check if all services (not only faulty service) is back to normal (Running) + pod_list = kubectl.list_pods(namespace) + all_normal = True + + # Check if the faulty service exists + faulty_service_exists = any(pod.metadata.name.startswith(faulty_service) for pod in pod_list.items) + if not faulty_service_exists: + print(f"Pod named {faulty_service} does not exist.") + all_normal = False + else: + for pod in pod_list.items: + if pod.status.phase != "Running": + print(f"❌ Pod {pod.metadata.name} is in phase: {pod.status.phase}") + all_normal = False + break + + for container_status in pod.status.container_statuses: + if container_status.state.waiting and container_status.state.waiting.reason: + print( + f"❌ Container {container_status.name} is waiting: {container_status.state.waiting.reason}" + ) + all_normal = False + elif container_status.state.terminated and container_status.state.terminated.reason != "Completed": + print( + f"❌ Container {container_status.name} terminated: {container_status.state.terminated.reason}" + ) + all_normal = False + elif not container_status.ready: + print(f"⚠️ Container {container_status.name} is not ready") + all_normal = False + + if not all_normal: + break + + results["success"] = all_normal + + print(f"Mitigation Result: {'Pass ✅' if all_normal else 'Fail ❌'}") + + return results diff --git a/sregym/conductor/oracles/base.py b/sregym/conductor/oracles/base.py new file mode 100644 index 0000000..465949d --- /dev/null +++ b/sregym/conductor/oracles/base.py @@ -0,0 +1,13 @@ +"""Base class for evaluation oracles.""" + +from abc import ABC, abstractmethod + + +class Oracle(ABC): + def __init__(self, problem): + self.problem = problem + + @abstractmethod + def evaluate(self, solution, trace, duration) -> dict: + """Evaluate a solution.""" + pass diff --git a/sregym/conductor/oracles/compound.py b/sregym/conductor/oracles/compound.py new file mode 100644 index 0000000..421ea0f --- /dev/null +++ b/sregym/conductor/oracles/compound.py @@ -0,0 +1,60 @@ +from sregym.conductor.oracles.base import Oracle + + +class CompoundedOracle(Oracle): + importance = 1.0 + + def __init__(self, problem, *args, **kwargs): + super().__init__(problem) + self.oracles = dict() + for i, oracle in enumerate(args): + if not isinstance(oracle, Oracle): + raise TypeError(f"Argument {i} is not an instance of Oracle: {oracle}") + self.oracles[str(i) + "-" + oracle.__class__.__name__] = oracle + for key, oracle in kwargs.items(): + if not isinstance(oracle, Oracle): + raise TypeError(f"Keyword argument '{key}' is not an instance of Oracle: {oracle}") + if key in self.oracles: + raise ValueError(f"Duplicate oracle key: {key}") + self.oracles[key] = oracle + + def evaluate(self, *args, **kwargs): + result = { + "success": True, + "oracles": [], + "accuracy": 0.0, + } + + total_weight = sum(getattr(oracle, "importance", 1.0) for oracle in self.oracles.values()) + + for key, oracle in self.oracles.items(): + try: + res = oracle.evaluate(*args, **kwargs) + res["name"] = key + result["oracles"].append(res) + + if not res.get("success", False): + result["success"] = False + + accuracy_weight = getattr(oracle, "importance", 1.0) / total_weight + if "accuracy" in res: + result["accuracy"] += res["accuracy"] * accuracy_weight + else: + accuracy = 100.0 if res.get("success", False) else 0.0 + result["accuracy"] += accuracy * accuracy_weight + + except Exception as e: + print(f"[❌] Error during evaluation of oracle '{key}': {e}") + result["success"] = False + result["oracles"].append( + { + "name": key, + "success": False, + } + ) + + if result["accuracy"] > 100.0 - 1e-3: + result["accuracy"] = 100.0 + elif result["accuracy"] < 0.0 + 1e-3: + result["accuracy"] = 0.0 + return result diff --git a/sregym/conductor/oracles/detection.py b/sregym/conductor/oracles/detection.py new file mode 100644 index 0000000..fa9bd9e --- /dev/null +++ b/sregym/conductor/oracles/detection.py @@ -0,0 +1,30 @@ +from sregym.conductor.oracles.base import Oracle +from sregym.conductor.oracles.utils import is_exact_match + + +import logging + +local_logger = logging.getLogger("all.sregym.oracle") +local_logger.propagate = True +local_logger.setLevel(logging.DEBUG) +class DetectionOracle(Oracle): + def __init__(self, problem): + super().__init__(problem) + + def evaluate(self, solution) -> dict: + expected = "Yes" if self.problem.fault_injected else "No" + local_logger.info(f"== Detection Evaluation (expected: {expected}) ==") + + results = {} + if isinstance(solution, str): + is_correct = is_exact_match(solution.strip().lower(), expected.lower()) + results["accuracy"] = 100.0 if is_correct else 0.0 + results["success"] = is_correct + local_logger.info(f"{'✅' if is_correct else '❌'} Detection: {solution}") + else: + results["accuracy"] = 0.0 + results["success"] = False + results["reason"] = "Invalid Format" + local_logger.warning("❌ Invalid detection format") + + return results diff --git a/sregym/conductor/oracles/diagnosis_oracle.py b/sregym/conductor/oracles/diagnosis_oracle.py new file mode 100644 index 0000000..dc79a78 --- /dev/null +++ b/sregym/conductor/oracles/diagnosis_oracle.py @@ -0,0 +1,547 @@ +import logging +from logging import getLogger +from typing import Any + +from kubernetes import client, config +from kubernetes.config.config_exception import ConfigException + +from sregym.conductor.oracles.base import Oracle + +local_logger = getLogger("all.sregym.localization_oracle") +local_logger.propagate = True +local_logger.setLevel(logging.DEBUG) + + +class DiagnosisOracle(Oracle): + """Logic of Localization Oracle""" + + # BEFORE the agent are ask to act, expect function will be called and checkpoint will be saved + # AFTER the agent finish its run, the expected function will be called AGAIN to compare with agents answer. + + def __init__(self, problem, namespace: str): + super().__init__(problem) + # self.expected = expected + self.checkpoint = None + self.namespace = namespace + + def load_localization_checkpoint(self): + # load the checkpoint for future comparison + self.checkpoint = self.expect() + + def compare_truth(self, expectation, reality): + if type(expectation) == str and type(reality) == str: + return expectation == reality # both string, just compare the string + elif type(expectation) == list and type(reality) == list: + if len(expectation) != len(set(reality)): + return False # TODO: support fp and fn + return all(e in set(reality) for e in expectation) + else: + local_logger.warning( + f"Expectation and reality are not both string or list, can not compare. Expectation: {expectation}, Reality: {reality}" + ) + return False + + def expect(self): + raise NotImplementedError("This function should be implemented by the subclass.") + + def verify_stability(self, new_expectation): + consistent = self.compare_truth(self.checkpoint, new_expectation) + + if not consistent: + # just warn, do not panic + local_logger.warning( + f"Checkpoints are not consistent, old: {self.checkpoint}, new: {new_expectation}. Possibly the environment is unstable." + ) + + return consistent + + def get_resource_uid(self, resource_type: str, resource_name: str, namespace: str) -> str | None: + """Return the UID of a live resource using the Kubernetes API.""" + try: + try: + config.load_incluster_config() + except ConfigException: + config.load_kube_config() + if resource_type.lower() == "pod": + api = client.CoreV1Api() + obj = api.read_namespaced_pod(resource_name, namespace) + elif resource_type.lower() == "service": + api = client.CoreV1Api() + obj = api.read_namespaced_service(resource_name, namespace) + elif resource_type.lower() == "deployment": + api = client.AppsV1Api() + obj = api.read_namespaced_deployment(resource_name, namespace) + elif resource_type.lower() == "statefulset": + api = client.AppsV1Api() + obj = api.read_namespaced_stateful_set(resource_name, namespace) + elif resource_type.lower() == "persistentvolumeclaim": + api = client.CoreV1Api() + obj = api.read_namespaced_persistent_volume_claim(resource_name, namespace) + elif resource_type.lower() == "persistentvolume": + api = client.CoreV1Api() + obj = api.read_persistent_volume(resource_name) + elif resource_type.lower() == "configmap": + api = client.CoreV1Api() + obj = api.read_namespaced_config_map(resource_name, namespace) + elif resource_type.lower() == "replicaset": + api = client.AppsV1Api() + obj = api.read_namespaced_replica_set(resource_name, namespace) + elif resource_type.lower() == "memoryquota": + api = client.CoreV1Api() + obj = api.read_namespaced_resource_quota(resource_name, namespace) + elif resource_type.lower() == "ingress": + api = client.NetworkingV1Api() + obj = api.read_namespaced_ingress(resource_name, namespace) + elif resource_type.lower() == "job": + api = client.BatchV1Api() + obj = api.read_namespaced_job(resource_name, namespace) + elif resource_type.lower() == "daemonset": + api = client.AppsV1Api() + obj = api.read_namespaced_daemon_set(resource_name, namespace) + elif resource_type.lower() == "clusterrole": + api = client.RbacAuthorizationV1Api() + obj = api.read_cluster_role(resource_name) + elif resource_type.lower() == "clusterrolebinding": + api = client.RbacAuthorizationV1Api() + obj = api.read_cluster_role_binding(resource_name) + else: + raise ValueError(f"Unsupported resource type: {resource_type}") + + return obj.metadata.uid + + except client.exceptions.ApiException as e: + return f"Error retrieving UID for {resource_type}/{resource_name} in {namespace}: {e.reason}" + + def checkpoint_comparison(self, new_expectation): + if type(self.checkpoint) == str: + return self.checkpoint == new_expectation + + def safe_parse_solution(self, solution): + # Normalize solution to list of strings + if isinstance(solution, str): + # Check if it's a comma-separated list + # strip char before [ + if "[" in solution and "]" in solution: + solution = solution.split("[")[1] + # strip char after ] + solution = solution.split("]")[0] + if "," in solution: + # split by comma, strip space and quote + solution = [s.strip().strip("\"'") for s in solution.split(",")] + else: + solution = [solution.strip().strip("\"'")] + else: + solution = [solution.strip().strip("\"'")] + elif isinstance(solution, list): + # Ensure all items are strings + solution = [str(item) for item in solution] + else: + return None + return solution + + def evaluate(self, solution) -> dict[str, Any]: + # verify the stability of the environment + new_expectation = self.expect() + self.verify_stability(new_expectation) + + # load the solution + solution = self.safe_parse_solution(solution) + if solution is None: + local_logger.warning(f"Invalid format: expected string or list of strings. Solution: {solution}") + return { + "success": False, + "accuracy": 0.0, + "is_subset": False, + } + + # get compare the new expectation with the checkpoint + correctness = self.compare_truth(new_expectation, solution) + + local_logger.info( + f"Eval Diagnosis: new_expectation: {new_expectation}, solution: {solution} | {"✅" if correctness else "❌"}" + ) + + return { + "success": correctness, + "accuracy": 100.0 if correctness else 0.0, + "is_subset": False, # TODO: enable subset match + } + + ####### Helper functions ###### + def only_pod_of_deployment_uid(self, deployment_name: str, namespace: str) -> tuple[str, str]: + """Return the UID and name of the only pod of a deployment. If not only or more than one pod, Raise an error.""" + try: + # print("find pods for deployment", deployment_name, "in namespace", namespace) + pods_list = client.CoreV1Api().list_namespaced_pod( + namespace=namespace, label_selector=f"app={deployment_name}" + ) + # print("pods_list", pods_list) + pods = pods_list.items # V1PodList has an 'items' attribute containing the actual list + + # TODO: use more robust way to select the pod OwnerRef or understand the deployment spec. + + # fallback, use io.kompose.service label + if len(pods) == 0: + local_logger.debug("fallback to io.kompose.service label") + pods_list = client.CoreV1Api().list_namespaced_pod( + namespace=namespace, label_selector=f"io.kompose.service={deployment_name}" + ) + # print("pods_list", pods_list) + pods = pods_list.items + + # fallback 2, use opentelemetry label to select the pod + if len(pods) == 0: + local_logger.debug("fallback to opentelemetry label") + pods_list = client.CoreV1Api().list_namespaced_pod( + namespace=namespace, label_selector=f"opentelemetry.io/name={deployment_name}" + ) + # print("pods_list", pods_list) + pods = pods_list.items + + if len(pods) > 1: + # print(pods) + raise ValueError( + f"More than one pod found for deployment {deployment_name} in namespace {namespace}: {pods}, can not evaluate diagnosis." + ) + if len(pods) == 0: + # print(pods) + raise ValueError( + f"No pod found for deployment {deployment_name} in namespace {namespace}, can not evaluate diagnosis." + ) + return pods[0].metadata.uid, pods[0].metadata.name + except Exception as e: + raise ValueError(f"Error retrieving pod UID for deployment {deployment_name} in namespace {namespace}: {e}") + + def all_pods_of_deployment_uids(self, deployment_name: str, namespace: str) -> (list[str], list[str]): + """Return the UIDs and names of all pods of a deployment.""" + try: + pods_list = client.CoreV1Api().list_namespaced_pod( + namespace=namespace, label_selector=f"app={deployment_name}" + ) + pods = pods_list.items + if len(pods) == 0: + pods_list = client.CoreV1Api().list_namespaced_pod( + namespace=namespace, label_selector=f"io.kompose.service={deployment_name}" + ) + pods = pods_list.items + if len(pods) == 0: + pods_list = client.CoreV1Api().list_namespaced_pod( + namespace=namespace, label_selector=f"opentelemetry.io/name={deployment_name}" + ) + pods = pods_list.items + return [pod.metadata.uid for pod in pods], [pod.metadata.name for pod in pods] + except Exception as e: + raise ValueError(f"Error retrieving pods for deployment {deployment_name} in namespace {namespace}: {e}") + + def all_pods_of_daemonset_uids(self, daemonset_name: str, namespace: str) -> (list[str], list[str]): + """Return the UIDs and names of all pods of a daemonset.""" + try: + pods_list = client.CoreV1Api().list_namespaced_pod( + namespace=namespace, label_selector=f"k8s-app={daemonset_name}" + ) + pods = pods_list.items + if len(pods) == 0: + pods_list = client.CoreV1Api().list_namespaced_pod( + namespace=namespace, label_selector=f"app={daemonset_name}" + ) + pods = pods_list.items + if len(pods) == 0: + pods_list = client.CoreV1Api().list_namespaced_pod( + namespace=namespace, label_selector=f"io.kompose.service={daemonset_name}" + ) + pods = pods_list.items + if len(pods) == 0: + pods_list = client.CoreV1Api().list_namespaced_pod( + namespace=namespace, label_selector=f"opentelemetry.io/name={daemonset_name}" + ) + pods = pods_list.items + return [pod.metadata.uid for pod in pods], [pod.metadata.name for pod in pods] + except Exception as e: + raise ValueError(f"Error retrieving pods for daemonset {daemonset_name} in namespace {namespace}: {e}") + + def deployment_uid(self, deployment_name: str, namespace: str) -> str: + """Return the UID of a deployment.""" + return self.get_resource_uid("deployment", deployment_name, namespace) + + def configmap_uid(self, configmap_name: str, namespace: str) -> str: + """Return the UID of a configmap.""" + return self.get_resource_uid("configmap", configmap_name, namespace) + + def pvc_uid(self, pvc_name: str, namespace: str) -> str: + """Return the UID of a PVC.""" + return self.get_resource_uid("persistentvolumeclaim", pvc_name, namespace) + + def service_uid(self, service_name: str, namespace: str) -> str: + """Return the UID of a service.""" + return self.get_resource_uid("service", service_name, namespace) + + def memoryquota_uid(self, memoryquota_name: str, namespace: str) -> str: + """Return the UID of a memoryquota.""" + return self.get_resource_uid("memoryquota", memoryquota_name, namespace) + + def pv_uid(self, pv_name: str, namespace: str) -> str: + """Return the UID of a PV.""" + return self.get_resource_uid("persistentvolume", pv_name, namespace) + + def ingress_uid(self, ingress_name: str, namespace: str) -> str: + """Return the UID of an ingress.""" + return self.get_resource_uid("ingress", ingress_name, namespace) + + def networkpolicy_uid(self, networkpolicy_name: str, namespace: str) -> str: + """Return the UID of a networkpolicy.""" + return self.get_resource_uid("networkpolicy", networkpolicy_name, namespace) + + def job_uid(self, job_name: str, namespace: str) -> str: + """Return the UID of a job.""" + return self.get_resource_uid("job", job_name, namespace) + + def clusterrole_uid(self, clusterrole_name: str, namespace: str) -> str: + """Return the UID of a clusterrole.""" + return self.get_resource_uid("clusterrole", clusterrole_name, namespace) + + def clusterrolebinding_uid(self, clusterrolebinding_name: str, namespace: str) -> str: + """Return the UID of a clusterrolebinding.""" + return self.get_resource_uid("clusterrolebinding", clusterrolebinding_name, namespace) + + def owner_of_pod(self, pod_name: str, namespace: str) -> dict[str, Any] | None: + """ + Return the top-level owner (controller) of a pod using Kubernetes Owner References. + + This method follows the owner chain to find the ultimate controller: + - Pod → ReplicaSet → Deployment + - Pod → StatefulSet (direct) + - Pod → DaemonSet (direct) + - Pod → Job → CronJob (if applicable) + + Args: + pod_name: Name of the pod + namespace: Namespace of the pod + + Returns: + Dictionary with keys: 'kind', 'name', 'uid', 'api_version' + Returns None if no owner is found or pod doesn't exist + + Example: + { + 'kind': 'Deployment', + 'name': 'frontend', + 'uid': 'abc-123-def', + 'api_version': 'apps/v1' + } + """ + try: + try: + config.load_incluster_config() + except ConfigException: + config.load_kube_config() + except Exception as e: + raise RuntimeError(f"Failed to load kube config: {e}") + + core_v1 = client.CoreV1Api() + apps_v1 = client.AppsV1Api() + batch_v1 = client.BatchV1Api() + + try: + # Step 1: Get the pod + pod = core_v1.read_namespaced_pod(pod_name, namespace) + except client.exceptions.ApiException as e: + print(f"[WARN] Pod '{pod_name}' not found in namespace '{namespace}': {e.reason}") + return None + + # Step 2: Get owner references from pod + owner_refs = pod.metadata.owner_references + if not owner_refs: + print(f"[INFO] Pod '{pod_name}' has no owner references (may be manually created)") + return None + + # Step 3: Find the controller owner (controller: true) + controller_owner = None + for owner in owner_refs: + if owner.controller: + controller_owner = owner + break + + if not controller_owner: + print(f"[WARN] Pod '{pod_name}' has no controller owner") + return None + + # Step 4: Handle different owner types + owner_kind = controller_owner.kind + owner_name = controller_owner.name + owner_uid = controller_owner.uid + owner_api_version = controller_owner.api_version + + print(f"[INFO] Pod '{pod_name}' is owned by {owner_kind} '{owner_name}'") + + # Step 5: If owner is ReplicaSet, continue up to find Deployment + if owner_kind == "ReplicaSet": + try: + replicaset = apps_v1.read_namespaced_replica_set(owner_name, namespace) + rs_owner_refs = replicaset.metadata.owner_references + + if rs_owner_refs: + for rs_owner in rs_owner_refs: + if rs_owner.controller and rs_owner.kind == "Deployment": + print(f"[INFO] ReplicaSet '{owner_name}' is owned by Deployment '{rs_owner.name}'") + return { + "kind": "Deployment", + "name": rs_owner.name, + "uid": rs_owner.uid, + "api_version": rs_owner.api_version, + "intermediate_owner": {"kind": "ReplicaSet", "name": owner_name, "uid": owner_uid}, + } + + # If ReplicaSet has no owner, return ReplicaSet itself + print(f"[INFO] ReplicaSet '{owner_name}' has no owner (may be manually created)") + return {"kind": "ReplicaSet", "name": owner_name, "uid": owner_uid, "api_version": owner_api_version} + except client.exceptions.ApiException as e: + print(f"[WARN] ReplicaSet '{owner_name}' not found: {e.reason}") + # Fallback: return ReplicaSet info even though we can't verify it + return {"kind": "ReplicaSet", "name": owner_name, "uid": owner_uid, "api_version": owner_api_version} + + # Step 6: If owner is Job, check if it's owned by CronJob + elif owner_kind == "Job": + try: + job = batch_v1.read_namespaced_job(owner_name, namespace) + job_owner_refs = job.metadata.owner_references + + if job_owner_refs: + for job_owner in job_owner_refs: + if job_owner.controller and job_owner.kind == "CronJob": + print(f"[INFO] Job '{owner_name}' is owned by CronJob '{job_owner.name}'") + return { + "kind": "CronJob", + "name": job_owner.name, + "uid": job_owner.uid, + "api_version": job_owner.api_version, + "intermediate_owner": {"kind": "Job", "name": owner_name, "uid": owner_uid}, + } + + # If Job has no owner, return Job itself + return {"kind": "Job", "name": owner_name, "uid": owner_uid, "api_version": owner_api_version} + except client.exceptions.ApiException as e: + print(f"[WARN] Job '{owner_name}' not found: {e.reason}") + return {"kind": "Job", "name": owner_name, "uid": owner_uid, "api_version": owner_api_version} + + # Step 7: Direct owners (StatefulSet, DaemonSet, etc.) + else: + return {"kind": owner_kind, "name": owner_name, "uid": owner_uid, "api_version": owner_api_version} + + def pods_of_owner(self, owner_kind: str, owner_name: str, namespace: str) -> list[dict[str, Any]]: + """ + Find all pods owned by a specific controller (Deployment, StatefulSet, etc.). + + This is the reverse operation of owner_of_pod. It finds all pods that belong + to a given controller by following the owner chain. + + Args: + owner_kind: Kind of the owner (Deployment, StatefulSet, DaemonSet, etc.) + owner_name: Name of the owner + namespace: Namespace of the owner + + Returns: + List of pod information dictionaries, each containing: + - 'name': Pod name + - 'uid': Pod UID + - 'phase': Pod phase (Running, Pending, etc.) + + Example: + [ + { + 'name': 'frontend-abc123', + 'uid': 'pod-uid-123', + 'phase': 'Running' + }, + ... + ] + """ + try: + try: + config.load_incluster_config() + except ConfigException: + config.load_kube_config() + except Exception as e: + raise RuntimeError(f"Failed to load kube config: {e}") + + core_v1 = client.CoreV1Api() + apps_v1 = client.AppsV1Api() + pods_info = [] + + try: + # Get all pods in the namespace + all_pods = core_v1.list_namespaced_pod(namespace).items + + # Handle different owner types + if owner_kind == "Deployment": + # Find ReplicaSets owned by this Deployment + rs_list = apps_v1.list_namespaced_replica_set(namespace).items + matching_rs_names = set() + + for rs in rs_list: + rs_owner_refs = rs.metadata.owner_references + if rs_owner_refs: + for rs_owner in rs_owner_refs: + if rs_owner.controller and rs_owner.kind == "Deployment" and rs_owner.name == owner_name: + matching_rs_names.add(rs.metadata.name) + + # Find pods owned by matching ReplicaSets + for pod in all_pods: + pod_owner_refs = pod.metadata.owner_references + if pod_owner_refs: + for pod_owner in pod_owner_refs: + if ( + pod_owner.controller + and pod_owner.kind == "ReplicaSet" + and pod_owner.name in matching_rs_names + ): + pods_info.append( + { + "name": pod.metadata.name, + "uid": pod.metadata.uid, + "phase": pod.status.phase, + "node_name": pod.spec.node_name if pod.spec.node_name else None, + } + ) + + elif owner_kind in ["StatefulSet", "DaemonSet"]: + # Direct ownership for StatefulSet and DaemonSet + for pod in all_pods: + pod_owner_refs = pod.metadata.owner_references + if pod_owner_refs: + for pod_owner in pod_owner_refs: + if pod_owner.controller and pod_owner.kind == owner_kind and pod_owner.name == owner_name: + pods_info.append( + { + "name": pod.metadata.name, + "uid": pod.metadata.uid, + "phase": pod.status.phase, + "node_name": pod.spec.node_name if pod.spec.node_name else None, + } + ) + + elif owner_kind == "Job": + # Direct ownership for Job + for pod in all_pods: + pod_owner_refs = pod.metadata.owner_references + if pod_owner_refs: + for pod_owner in pod_owner_refs: + if pod_owner.controller and pod_owner.kind == "Job" and pod_owner.name == owner_name: + pods_info.append( + { + "name": pod.metadata.name, + "uid": pod.metadata.uid, + "phase": pod.status.phase, + "node_name": pod.spec.node_name if pod.spec.node_name else None, + } + ) + + else: + print(f"[WARN] Unsupported owner kind: {owner_kind}") + return [] + + print(f"[INFO] Found {len(pods_info)} pod(s) owned by {owner_kind} '{owner_name}'") + return pods_info + + except Exception as e: + print(f"[ERROR] Failed to find pods for {owner_kind} '{owner_name}': {e}") + return [] diff --git a/sregym/conductor/oracles/dns_resolution_mitigation.py b/sregym/conductor/oracles/dns_resolution_mitigation.py new file mode 100644 index 0000000..c504fb8 --- /dev/null +++ b/sregym/conductor/oracles/dns_resolution_mitigation.py @@ -0,0 +1,66 @@ +from sregym.conductor.oracles.base import Oracle + + +class DNSResolutionMitigationOracle(Oracle): + + importance = 1.0 + + def evaluate(self) -> dict: + print("== DNS Resolution Mitigation Check ==") + + kubectl = self.problem.kubectl + namespace = self.problem.namespace + faulty_service = self.problem.faulty_service + + service_names = [svc.metadata.name for svc in kubectl.list_services(namespace).items] + + if faulty_service == None: + faulty_service = service_names[0] + + # Get the service's selector + command = f"kubectl get service {faulty_service} -n {namespace} -o jsonpath='{{.spec.selector}}'" + selector_output = kubectl.exec_command(command).strip() + + # Parse selector to label query + selector_dict = eval(selector_output) + label_selector = ",".join(f"{k}={v}" for k, v in selector_dict.items()) + + # Get pod names using the selector + command = f"kubectl get pods -n {namespace} -l {label_selector} -o jsonpath='{{.items[*].metadata.name}}'" + pod_names = kubectl.exec_command(command).strip().split() + + target_pod = pod_names[0] + + if not pod_names: + print("❌ No running pod found for the faulty service(s).") + return {"success": False} + else: + + failing = [] + + for svc in service_names: + try: + command = ( + f"kubectl exec -n {namespace} {target_pod} -- getent hosts {svc}.{namespace}.svc.cluster.local" + ) + output = kubectl.exec_command(command) + is_success = "exit code" not in output + + if not is_success: + failing.append(svc) + print(f"[❌] Failed DNS Resolution: {svc}") + else: + print(f"[✅] Successfully resolved DNS for {svc}") + + except Exception as e: + print(f"Error execing getent hosts in pod {target_pod}: {e}") + failing.append(svc) + + if failing: + print( + f"[❌] Faulty Service: {faulty_service} | Failed DNS Resolutions from target pod {target_pod}: {', '.join(failing)}" + ) + return {"success": False} + + print(f"[✅] All service names resolved inside target pod {target_pod}") + return {"success": True} diff --git a/sregym/conductor/oracles/imbalance_mitigation.py b/sregym/conductor/oracles/imbalance_mitigation.py new file mode 100644 index 0000000..b99e046 --- /dev/null +++ b/sregym/conductor/oracles/imbalance_mitigation.py @@ -0,0 +1,49 @@ +import time + +from sregym.conductor.oracles.base import Oracle + + +class ImbalanceMitigationOracle(Oracle): + importance = 1.0 + RETRIES = 5 + + def __init__(self, problem): + super().__init__(problem) + + def evaluate(self) -> dict: + print("== Mitigation Evaluation ==") + + kubectl = self.problem.kubectl + namespace = self.problem.namespace + deployment_names = self.problem.faulty_service + results = {} + + results["success"] = True + + for deployment_name in deployment_names: + for _ in range(self.RETRIES): + usage_dict = kubectl.get_pod_cpu_usage(namespace) + + usages = [] + for pod_name, cpu_usage in usage_dict.items(): + if deployment_name in pod_name and "frontend-proxy" not in pod_name: + usages.append(int(cpu_usage)) + max_usage = max(usages) + if len(usages) == 1: + print("Wait the top info to be ready...") + time.sleep(10) + continue + average_others = (sum(usages) - max_usage) / (len(usages) - 1) + + if max_usage > average_others * 3: + print( + f"❌ Deployment {deployment_name} still not balanced (max usage: {max_usage}, average others: {average_others})" + ) + results["success"] = False + return results + + time.sleep(10) # wait for variation + + print(f"✅ Deployment {deployment_name} balanced") + results["success"] = True + return results diff --git a/sregym/conductor/oracles/incorrect_image_mitigation.py b/sregym/conductor/oracles/incorrect_image_mitigation.py new file mode 100644 index 0000000..25c7c73 --- /dev/null +++ b/sregym/conductor/oracles/incorrect_image_mitigation.py @@ -0,0 +1,33 @@ +from sregym.conductor.oracles.base import Oracle + + +class IncorrectImageMitigationOracle(Oracle): + importance = 1.0 + + def __init__(self, problem, actual_images: dict = {}): + super().__init__(problem) + self.actual_images = actual_images + + def evaluate(self) -> dict: + print("== Mitigation Evaluation ==") + + kubectl = self.problem.kubectl + namespace = self.problem.namespace + deployment_names = self.problem.faulty_service + results = {} + + results["success"] = True + + for deployment_name in deployment_names: + # Fetch the current deployment + deployment = kubectl.get_deployment(deployment_name, namespace) + container = deployment.spec.template.spec.containers[0] + actual_image = container.image + + if actual_image == self.actual_images[deployment_name]: + print(f"❌ Deployment {deployment_name} still using incorrect image: {actual_image}") + results["success"] = False + else: + print(f"✅ Deployment {deployment_name} using correct image: {actual_image}") + + return results diff --git a/sregym/conductor/oracles/incorrect_port.py b/sregym/conductor/oracles/incorrect_port.py new file mode 100644 index 0000000..97bd40f --- /dev/null +++ b/sregym/conductor/oracles/incorrect_port.py @@ -0,0 +1,43 @@ +from sregym.conductor.oracles.base import Oracle + + +class IncorrectPortAssignmentMitigationOracle(Oracle): + importance = 1.0 + + def evaluate(self) -> dict: + print("== Mitigation Evaluation ==") + + kubectl = self.problem.kubectl + namespace = self.problem.namespace + deployment_name = self.problem.faulty_service + env_var = self.problem.env_var + expected_port = self.problem.correct_port + results = {} + + # Fetch deployment + deployment = kubectl.get_deployment(deployment_name, namespace) + container = deployment.spec.template.spec.containers[0] + + found = False + correct = False + + for e in container.env: + if e.name == env_var: + found = True + value = e.value + print(f"🔍 Found {env_var}={value}") + parts = value.split(":") + if len(parts) == 2 and parts[1] == expected_port: + correct = True + else: + print( + f"❌ Incorrect port: expected {expected_port}, found {parts[1] if len(parts) == 2 else 'N/A'}" + ) + break + + if not found: + print(f"❌ Env var {env_var} not found.") + results["success"] = found and correct + + print(f"Mitigation Result: {'Pass ✅' if results['success'] else 'Fail ❌'}") + return results diff --git a/sregym/conductor/oracles/ingress_misroute_oracle.py b/sregym/conductor/oracles/ingress_misroute_oracle.py new file mode 100644 index 0000000..1909001 --- /dev/null +++ b/sregym/conductor/oracles/ingress_misroute_oracle.py @@ -0,0 +1,31 @@ +import logging +from kubernetes import client +from sregym.conductor.oracles.base import Oracle + +class IngressMisrouteMitigationOracle(Oracle): + def __init__(self, problem): + super().__init__(problem=problem) + self.networking_v1 = client.NetworkingV1Api() + self.logger = logging.getLogger(__name__) + + def evaluate(self) -> bool: + results = {} + try: + ingress = self.networking_v1.read_namespaced_ingress(name=self.problem.ingress_name, namespace=self.problem.namespace) + for rule in ingress.spec.rules: + for path in rule.http.paths: + if path.path == self.problem.path: + if path.backend.service.name == self.problem.correct_service: + self.logger.info(f"Ingress path '{self.problem.path}' correctly routed to '{self.problem.correct_service}'.") + results["success"] = True + return results + else: + self.logger.info(f"Ingress path '{self.problem.path}' still routed to '{path.backend.service.name}', mitigation incomplete.") + results["success"] = False + return results + self.logger.error("Path not found in ingress, mitigation incomplete.") + results["success"] = False + except client.exceptions.ApiException as e: + self.logger.error(f"Error checking ingress configuration: {e}") + results["success"] = False + return results diff --git a/sregym/conductor/oracles/llm_as_a_judge/judge.py b/sregym/conductor/oracles/llm_as_a_judge/judge.py new file mode 100644 index 0000000..fee416d --- /dev/null +++ b/sregym/conductor/oracles/llm_as_a_judge/judge.py @@ -0,0 +1,244 @@ +"""LLM-as-a-Judge Oracle for evaluating agent solutions against expected root causes.""" + +import json +import os +import re +from enum import Enum +from pathlib import Path +from typing import Optional + +import yaml +from dotenv import load_dotenv +from langchain_core.messages import HumanMessage, SystemMessage + +from clients.stratus.llm_backend.get_llm_backend import LiteLLMBackend + +load_dotenv() + + +class JudgmentResult(str, Enum): + TRUE = "True" # Correct diagnosis + FALSE = "False" # Incorrect diagnosis + FALSE_POSITIVE = "FalsePositive" # Identified a problem when there isn't one + FALSE_NEGATIVE = "FalseNegative" # Missed a problem that exists + + +class LLMJudge: + def __init__( + self, + provider: Optional[str] = None, + model_name: Optional[str] = None, + url: Optional[str] = None, + api_key: Optional[str] = None, + temperature: float = 0.0, + max_tokens: int = 4096, + ): + # Load from environment if not provided + self.provider = provider or os.getenv("PROVIDER", "openai") + self.model_name = model_name or os.getenv("MODEL_TOOLS", "gpt-4o") + self.url = url or os.getenv("URL_TOOLS", "") + self.api_key = api_key or os.getenv("API_KEY_TOOLS", "") + self.temperature = temperature + self.max_tokens = max_tokens + + # Initialize LiteLLM backend + self.backend = LiteLLMBackend( + provider=self.provider, + model_name=self.model_name, + url=self.url, + api_key=self.api_key, + api_version=os.getenv("API_VERSION_TOOLS", ""), + seed=int(os.getenv("SEED_TOOLS", "42")), + top_p=float(os.getenv("TOP_P_TOOLS", "0.95")), + temperature=self.temperature, + reasoning_effort="", + thinking_tools="", + thinking_budget_tools=0, + max_tokens=self.max_tokens, + ) + + print(f"Initialized LLMJudge with provider={self.provider}, model={self.model_name}") + + def judge(self, solution: str, expectation: str) -> JudgmentResult: + system_prompt = """You are an expert judge evaluating whether an agent's diagnosis of a system issue matches the expected root cause. + +Your task is to compare the agent's answer with the expected root cause and determine if they are semantically equivalent. + +Classification criteria: +- **True**: The agent correctly identified the root cause. The diagnosis captures the essential problem even if worded differently. +- **False**: The agent identified a different problem or misdiagnosed the root cause. +- **FalsePositive**: The expected root cause is empty (no fault exists), but the agent reported a problem. +- **FalseNegative**: The expected root cause describes a real fault, but the agent reported no issues or said everything is normal. + +You must respond with EXACTLY ONE of these four values: True, False, FalsePositive, or FalseNegative + +Your response should be in the following JSON format: +{ + "judgment": "True|False|FalsePositive|FalseNegative", + "reasoning": "Brief explanation of why you made this judgment" +}""" + + user_prompt = f"""Expected Root Cause: +{expectation if expectation else "(No fault - system is operating normally)"} + +Agent's Answer: +{solution} + +Evaluate whether the agent's answer correctly identifies the root cause. Respond in JSON format with your judgment and reasoning.""" + + messages = [ + SystemMessage(content=system_prompt), + HumanMessage(content=user_prompt), + ] + + try: + # Get response from LLM + response = self.backend.inference(messages) + response_text = response.content.strip() + + print(f"LLM Response: {response_text}") + + # Parse the response + judgment = self._parse_judgment(response_text) + print(f"Parsed judgment: {judgment}") + + return judgment + + except Exception as e: + print(f"Error during judgment: {e}") + raise + + def _parse_judgment(self, response_text: str) -> JudgmentResult: + try: + # Remove markdown code blocks if present + clean_text = re.sub(r"```json\s*|\s*```", "", response_text) + clean_text = clean_text.strip() + + response_json = json.loads(clean_text) + judgment_str = response_json.get("judgment", "").strip() + reasoning = response_json.get("reasoning", "") + + print(f"Reasoning: {reasoning}") + + except json.JSONDecodeError: + # Fallback: try to extract judgment directly from text + print("Failed to parse JSON, attempting direct extraction") + judgment_str = response_text + + # Normalize the judgment string + judgment_str = judgment_str.strip().lower() + + # Map to JudgmentResult + if judgment_str == "true": + return JudgmentResult.TRUE + elif judgment_str == "false": + return JudgmentResult.FALSE + elif judgment_str in ["falsepositive", "false positive"]: + return JudgmentResult.FALSE_POSITIVE + elif judgment_str in ["falsenegative", "false negative"]: + return JudgmentResult.FALSE_NEGATIVE + else: + raise ValueError(f"Could not parse judgment from response: {response_text}") + + +def load_test_data(yaml_path: str) -> list[dict]: + with open(yaml_path, "r") as f: + data = yaml.safe_load(f) + return data + + +def main(): + # Get the directory of this script + script_dir = Path(__file__).parent + data_path = script_dir / "data.yaml" + + if not data_path.exists(): + print(f"Test data file not found: {data_path}") + return + + # Load test data + test_cases = load_test_data(str(data_path)) + print(f"Loaded {len(test_cases)} test cases from {data_path}") + + # Initialize judge + judge = LLMJudge() + + # Track results + total_cases = len(test_cases) + correct = 0 + incorrect = 0 + results = [] + + # Evaluate each test case + for i, test_case in enumerate(test_cases, 1): + description = test_case.get("description", "") + answer = test_case.get("answer", "") + expected_judgment = test_case.get("oracle", "") + + print(f"\n{'='*80}") + print(f"Test Case {i}/{total_cases}") + print( + f"Expected Root Cause: {description[:100]}..." + if len(description) > 100 + else f"Expected Root Cause: {description}" + ) + print(f"Agent Answer: {answer[:100]}..." if len(answer) > 100 else f"Agent Answer: {answer}") + print(f"Expected Judgment: {expected_judgment}") + + try: + # Get judgment from LLM + actual_judgment = judge.judge(solution=answer, expectation=description) + + # Normalize expected judgment for comparison + expected_normalized = expected_judgment.strip().lower().replace(" ", "") + actual_normalized = actual_judgment.value.lower().replace(" ", "") + + is_correct = expected_normalized == actual_normalized + + if is_correct: + correct += 1 + status = "✅ CORRECT" + else: + incorrect += 1 + status = "❌ INCORRECT" + + print(f"Actual Judgment: {actual_judgment.value}") + print(f"Status: {status}") + + results.append( + { + "test_case": i, + "expected": expected_judgment, + "actual": actual_judgment.value, + "correct": is_correct, + } + ) + + except Exception as e: + print(f"Error processing test case {i}: {e}") + incorrect += 1 + results.append( + { + "test_case": i, + "expected": expected_judgment, + "actual": f"ERROR: {str(e)}", + "correct": False, + } + ) + + # Print summary + print(f"\n{'='*80}") + print("SUMMARY") + print(f"{'='*80}") + print(f"Total test cases: {total_cases}") + print(f"Correct: {correct} ({correct/total_cases*100:.1f}%)") + print(f"Incorrect: {incorrect} ({incorrect/total_cases*100:.1f}%)") + print(f"\nDetailed Results:") + + for result in results: + status_symbol = "✅" if result["correct"] else "❌" + print(f" {status_symbol} Case {result['test_case']}: Expected={result['expected']}, Actual={result['actual']}") + + +if __name__ == "__main__": + main() diff --git a/sregym/conductor/oracles/llm_as_a_judge/llm_as_a_judge_oracle.py b/sregym/conductor/oracles/llm_as_a_judge/llm_as_a_judge_oracle.py new file mode 100644 index 0000000..4fb1865 --- /dev/null +++ b/sregym/conductor/oracles/llm_as_a_judge/llm_as_a_judge_oracle.py @@ -0,0 +1,75 @@ +"""LLM-as-a-Judge Oracle for evaluating agent solutions using LLM judgment.""" + +from typing import Optional + +from sregym.conductor.oracles.base import Oracle +from sregym.conductor.oracles.llm_as_a_judge.judge import JudgmentResult, LLMJudge + + +class LLMAsAJudgeOracle(Oracle): + """Oracle that uses an LLM judge to evaluate agent solutions against expected root causes.""" + + def __init__( + self, + problem, + expected: str, + provider: Optional[str] = None, + model_name: Optional[str] = None, + url: Optional[str] = None, + api_key: Optional[str] = None, + temperature: float = 0.0, + max_tokens: int = 4096, + ): + super().__init__(problem) + self.expected = expected if expected else "" + + # Initialize the LLM judge + self.judge = LLMJudge( + provider=provider, + model_name=model_name, + url=url, + api_key=api_key, + temperature=temperature, + max_tokens=max_tokens, + ) + + def evaluate(self, solution) -> dict: + print("== LLM-as-a-Judge Evaluation ==") + results = {} + + # Normalize solution to string + if not isinstance(solution, str): + solution = str(solution) + + try: + # Get judgment from LLM judge + judgment = self.judge.judge(solution=solution, expectation=self.expected) + + # Determine success based on judgment + is_correct = judgment == JudgmentResult.TRUE + + if is_correct: + acc = 100.0 + print(f"✅ Correct diagnosis: {judgment.value}") + else: + acc = 0.0 + print(f"❌ Incorrect diagnosis: {judgment.value}") + print( + f" Expected: {self.expected[:100]}..." + if len(self.expected) > 100 + else f" Expected: {self.expected}" + ) + print(f" Got: {solution[:100]}..." if len(solution) > 100 else f" Got: {solution}") + + results["judgment"] = judgment.value + results["success"] = is_correct + results["accuracy"] = acc + + except Exception as e: + print(f"❌ Error during LLM judgment: {e}") + results["judgment"] = "Error" + results["success"] = False + results["accuracy"] = 0.0 + results["error"] = str(e) + + return results diff --git a/sregym/conductor/oracles/localization.py b/sregym/conductor/oracles/localization.py new file mode 100644 index 0000000..328642b --- /dev/null +++ b/sregym/conductor/oracles/localization.py @@ -0,0 +1,61 @@ +from sregym.conductor.oracles.base import Oracle +from sregym.conductor.oracles.utils import is_exact_match, is_subset + + +class LocalizationOracle(Oracle): + def __init__(self, problem, expected: list[str] | str): + super().__init__(problem) + # Normalize expected to list of strings + if isinstance(expected, str): + self.expected = [expected] + elif isinstance(expected, list): + # Flatten if nested and ensure all items are strings + flattened = [] + for item in expected: + if isinstance(item, list): + flattened.extend([str(x) for x in item]) + else: + flattened.append(str(item)) + self.expected = flattened + else: + self.expected = [str(expected)] + + def evaluate(self, solution) -> dict: + print("== Localization Evaluation ==") + results = {} + + # Normalize solution to list of strings + if isinstance(solution, str): + # Check if it's a comma-separated list + if "," in solution: + solution = [s.strip() for s in solution.split(",")] + else: + solution = [solution] + elif isinstance(solution, list): + # Ensure all items are strings + solution = [str(item) for item in solution] + else: + results["accuracy"] = 0.0 + results["success"] = False + results["is_subset"] = False + print("❌ Invalid format: expected string or list of strings") + return results + + is_exact = is_exact_match(solution, self.expected) + is_sub = is_subset(solution, self.expected) + + if is_exact: + acc = 100.0 + print(f"✅ Exact match: {solution}") + elif is_sub: + acc = (len(solution) / len(self.expected)) * 100.0 + print(f"⚠️ Subset match: {solution} | Accuracy: {acc:.2f}%") + else: + acc = 0.0 + print(f"❌ No match: {solution}") + + results["accuracy"] = acc + results["success"] = is_exact or (is_sub and len(solution) == len(self.expected)) + results["is_subset"] = is_sub + + return results diff --git a/sregym/conductor/oracles/missing_cm_key_mitigation.py b/sregym/conductor/oracles/missing_cm_key_mitigation.py new file mode 100644 index 0000000..2e044af --- /dev/null +++ b/sregym/conductor/oracles/missing_cm_key_mitigation.py @@ -0,0 +1,51 @@ +import json +import yaml +from typing import Optional + +from sregym.conductor.oracles.base import Oracle + + +class MissingCmKeyMitigationOracle(Oracle): + + importance = 1.0 + + def __init__(self, problem, configmap_name: str, expected_keys: list[str]): + + super().__init__(problem) + self.expected_keys = expected_keys + self.configmap_name = configmap_name + + def evaluate(self) -> dict: + print("== Missing ConfigMap Key Mitigation Evaluation ==") + + kubectl = self.problem.kubectl + namespace = self.problem.namespace + + try: + cm_yaml = kubectl.exec_command( + f"kubectl get configmap {self.configmap_name} -n {namespace} -o yaml" + ) + cm_data = yaml.safe_load(cm_yaml) + + config_json_str = cm_data.get("data", {}).get("config.json", "{}") + config_data = json.loads(config_json_str) + + missing_keys = [] + present_keys = [] + + for key in self.expected_keys: + if key in config_data: + present_keys.append(key) + else: + missing_keys.append(key) + + if missing_keys: + print(f"❌ Missing ConfigMap keys: {missing_keys}") + return {"success": False} + else: + print(f"✅ All expected ConfigMap keys present.") + return {"success": True} + + except Exception as e: + print(f"❌ Failed to check ConfigMap: {str(e)}") + return {"success": False} \ No newline at end of file diff --git a/sregym/conductor/oracles/missing_env_variable_mitigation.py b/sregym/conductor/oracles/missing_env_variable_mitigation.py new file mode 100644 index 0000000..6cd733c --- /dev/null +++ b/sregym/conductor/oracles/missing_env_variable_mitigation.py @@ -0,0 +1,71 @@ +from sregym.conductor.oracles.base import Oracle + + +class MissingEnvVariableMitigationOracle(Oracle): + + def evaluate(self) -> dict: + print("== Mitigation Evaluation ==") + + kubectl = self.problem.kubectl + namespace = self.problem.namespace + results = {} + + faulty_service = self.problem.faulty_service + env_var = self.problem.env_var + env_var_value = self.problem.env_var_value + + all_normal = False + # check if deployment exists + try: + deployment = kubectl.get_deployment(faulty_service, namespace) + env_var_found = False + + # check if env var exists in deployment + for container in deployment.spec.template.spec.containers: + if hasattr(container, 'env') and container.env: + for env in container.env: + if env.name == env_var and env.value == env_var_value: + print(f"✅ Found environment variable {env_var}={env_var_value} in container {container.name}") + env_var_found = True + break + if env_var_found: + break + + if not env_var_found: + print(f"❌ Failed to find environment variable {env_var}={env_var_value} in deployment {faulty_service}") + + all_normal = env_var_found + + except Exception as e: + print(f"❌ Failed to get deployment {faulty_service}: {e}") + all_normal = False + + if all_normal: + pod_list = kubectl.list_pods(namespace) + + for pod in pod_list.items: + if pod.status.phase != "Running": + print(f"❌ Pod {pod.metadata.name} is in phase: {pod.status.phase}") + all_normal = False + break + + for container_status in pod.status.container_statuses: + if container_status.state.waiting and container_status.state.waiting.reason: + print(f"❌ Container {container_status.name} is waiting: {container_status.state.waiting.reason}") + all_normal = False + elif container_status.state.terminated and container_status.state.terminated.reason != "Completed": + print( + f"❌ Container {container_status.name} terminated: {container_status.state.terminated.reason}" + ) + all_normal = False + elif not container_status.ready: + print(f"⚠️ Container {container_status.name} is not ready") + all_normal = False + + if not all_normal: + break + + results["success"] = all_normal + + print(f"Mitigation Result: {'✅ Pass' if results['success'] else '❌ Fail'}") + return results \ No newline at end of file diff --git a/sregym/conductor/oracles/mitigation.py b/sregym/conductor/oracles/mitigation.py new file mode 100644 index 0000000..04e6498 --- /dev/null +++ b/sregym/conductor/oracles/mitigation.py @@ -0,0 +1,40 @@ +from sregym.conductor.oracles.base import Oracle + + +class MitigationOracle(Oracle): + importance = 1.0 + + def evaluate(self) -> dict: + print("== Mitigation Evaluation ==") + + kubectl = self.problem.kubectl + namespace = self.problem.namespace + results = {} + + pod_list = kubectl.list_pods(namespace) + all_normal = True + + for pod in pod_list.items: + if pod.status.phase != "Running": + print(f"❌ Pod {pod.metadata.name} is in phase: {pod.status.phase}") + all_normal = False + break + + for container_status in pod.status.container_statuses: + if container_status.state.waiting and container_status.state.waiting.reason: + print(f"❌ Container {container_status.name} is waiting: {container_status.state.waiting.reason}") + all_normal = False + elif container_status.state.terminated and container_status.state.terminated.reason != "Completed": + print( + f"❌ Container {container_status.name} terminated: {container_status.state.terminated.reason}" + ) + all_normal = False + elif not container_status.ready: + print(f"⚠️ Container {container_status.name} is not ready") + all_normal = False + + if not all_normal: + break + + results["success"] = all_normal + return results diff --git a/sregym/conductor/oracles/namespace_memory_limit_mitigation.py b/sregym/conductor/oracles/namespace_memory_limit_mitigation.py new file mode 100644 index 0000000..05227da --- /dev/null +++ b/sregym/conductor/oracles/namespace_memory_limit_mitigation.py @@ -0,0 +1,41 @@ +from sregym.conductor.oracles.base import Oracle + + +class NamespaceMemoryLimitMitigationOracle(Oracle): + importance = 1.0 + + def evaluate(self) -> dict: + print("== Mitigation Evaluation ==") + kubectl = self.problem.kubectl + namespace = self.problem.namespace + deployment_name = self.problem.faulty_service + results = {} + + # 1. Check if memory-based ResourceQuota is gone + memory_quota_removed = True + quotas = kubectl.get_resource_quotas(namespace) + for quota in quotas: + if "memory" in quota.spec.hard: + print(f"❌ Memory quota still present: {quota.metadata.name}") + memory_quota_removed = False + + # 2. Check if pod for the deployment is Running and Ready + pod_list = kubectl.list_pods(namespace) + pod_recovered = False + + for pod in pod_list.items: + if deployment_name in pod.metadata.name and pod.status.phase == "Running": + all_ready = all(container.ready for container in pod.status.container_statuses) + if all_ready: + pod_recovered = True + break + else: + print(f"⚠️ Pod {pod.metadata.name} is Running but containers not ready") + elif deployment_name in pod.metadata.name: + print(f"❌ Pod {pod.metadata.name} is not in Running phase: {pod.status.phase}") + + success = memory_quota_removed and pod_recovered + results["success"] = success + + print(f"Mitigation Result: {'✅ Pass' if success else '❌ Fail'}") + return results diff --git a/sregym/conductor/oracles/network_policy_oracle.py b/sregym/conductor/oracles/network_policy_oracle.py new file mode 100644 index 0000000..cff6754 --- /dev/null +++ b/sregym/conductor/oracles/network_policy_oracle.py @@ -0,0 +1,29 @@ +import logging + +from kubernetes import client + +from sregym.conductor.oracles.base import Oracle + + +class NetworkPolicyMitigationOracle(Oracle): + def __init__(self, problem, policy_name=None): + super().__init__(problem=problem) + self.networking_v1 = client.NetworkingV1Api() + self.policy_name = policy_name or f"deny-all-{problem.faulty_service}" + self.logger = logging.getLogger(__name__) + + def evaluate(self) -> bool: + results = {} + try: + self.networking_v1.read_namespaced_network_policy(name=self.policy_name, namespace=self.problem.namespace) + # Policy still exists, mitigation incomplete + self.logger.info(f"NetworkPolicy '{self.policy_name}' still present, mitigation not complete.") + results["success"] = False + except client.exceptions.ApiException as e: + if e.status == 404: + self.logger.info(f"NetworkPolicy '{self.policy_name}' not found, mitigation successful.") + results["success"] = True + else: + self.logger.error(f"Error checking NetworkPolicy: {e}") + results["success"] = False + return results diff --git a/sregym/conductor/oracles/operator_misoperation/invalid_affinity_mitigation.py b/sregym/conductor/oracles/operator_misoperation/invalid_affinity_mitigation.py new file mode 100644 index 0000000..dc29747 --- /dev/null +++ b/sregym/conductor/oracles/operator_misoperation/invalid_affinity_mitigation.py @@ -0,0 +1,123 @@ +import json +import yaml +import tempfile +from sregym.conductor.oracles.base import Oracle + +class InvalidAffinityMitigationOracle(Oracle): + def __init__(self, problem, deployment_name: str): + super().__init__(problem) + self.cr_name = "basic" + + self.deployment_name = deployment_name + self.namespace = problem.namespace + self.kubectl = problem.kubectl + + def evaluatePods(self) -> dict: + print("== Evaluating pod readiness ==") + try: + output = self.kubectl.exec_command( + f"kubectl get pods -n {self.namespace} -o yaml" + ) + pods = yaml.safe_load(output) + pods_list = pods.get("items", []) + pod_statuses = {} + for pod in pods_list: + pod_name = pod["metadata"]["name"] + container_status = pod["status"].get("containerStatuses", []) + if container_status: + state = container_status[0].get("state", {}) + if "waiting" in state: + reason = state["waiting"].get("reason", "Unknown") + pod_statuses[pod_name] = reason + elif "running" in state: + pod_statuses[pod_name] = "Running" + else: + pod_statuses[pod_name] = "Terminated" + else: + pod_statuses[pod_name] = "No Status" + + print("Pod Statuses:") + for pod, status in pod_statuses.items(): + print(f" - {pod}: {status}") + if status != "Running": + print(f"Pod {pod} is not running. Status: {status}") + return {"success": False} + print("All pods are running.") + return {"success": True} + except Exception as e: + print(f"Error during evaluation: {str(e)}") + return {"success": False} + + + + def evaluate(self) -> dict: + evaluatePods = self.evaluatePods() + print(f"Pod readiness: {evaluatePods}") + ns = self.namespace + name = "basic" + + cr_json = json.loads(self.kubectl.exec_command( + f"kubectl get tidbcluster {name} -n tidb-cluster -o json" + )) + cr_effects = [ + t.get("effect") + for t in (cr_json.get("spec", {}).get("tidb", {}).get("tolerations", []) or []) + if isinstance(t, dict) + ] + + + try: + sts_json = json.loads(self.kubectl.exec_command( + f"kubectl get sts {name}-tidb -n {ns} -o json" + )) + tpl_tolerations = (sts_json.get("spec", {}) + .get("template", {}) + .get("spec", {}) + .get("tolerations", []) or []) + sts_effects = [t.get("effect") for t in tpl_tolerations if isinstance(t, dict)] + except Exception: + sts_json = {} + sts_effects = [] + + + pod_effects = [] + try: + pods_json = json.loads(self.kubectl.exec_command( + f"kubectl get pods -n {ns} -l app.kubernetes.io/instance={name},app.kubernetes.io/component=tidb -o json" + )) + for item in pods_json.get("items", []): + tol = (item.get("spec", {}) or {}).get("tolerations", []) or [] + pod_effects.extend([t.get("effect") for t in tol if isinstance(t, dict)]) + except Exception: + pods_json = {} + + try: + ev = self.kubectl.exec_command(f"kubectl get events -n {ns} --sort-by=.metadata.creationTimestamp | tail -n 30") + except Exception: + ev = "" + + bad = "TAKE_SOME_EFFECT" + applied_in = { + "cr_has_bad_effect": (bad in cr_effects), + "sts_has_bad_effect": (bad in sts_effects), + "any_pod_has_bad_effect": (bad in pod_effects), + } + success = not any(applied_in.values()) + + return { + "success": success, + "details": { + "cr_effects": cr_effects, + "sts_effects": sts_effects, + "pod_effects": pod_effects, + "recent_events_tail": ev[-2000:], + }, + "applied_in": applied_in + } + + + + + + + \ No newline at end of file diff --git a/sregym/conductor/oracles/operator_misoperation/non_existent_storage_mitigation.py b/sregym/conductor/oracles/operator_misoperation/non_existent_storage_mitigation.py new file mode 100644 index 0000000..a521f7a --- /dev/null +++ b/sregym/conductor/oracles/operator_misoperation/non_existent_storage_mitigation.py @@ -0,0 +1,113 @@ +import json +import yaml +import tempfile +from sregym.conductor.oracles.base import Oracle + +class NonExistentStorageClassMitigationOracle(Oracle): + def __init__(self, problem, deployment_name: str): + super().__init__(problem) + self.cr_name = "basic" + + self.deployment_name = deployment_name + self.namespace = problem.namespace + self.kubectl = problem.kubectl + + def evaluatePods(self) -> dict: + print("== Evaluating pod readiness ==") + try: + output = self.kubectl.exec_command( + f"kubectl get pods -n {self.namespace} -o yaml" + ) + pods = yaml.safe_load(output) + pods_list = pods.get("items", []) + pod_statuses = {} + for pod in pods_list: + pod_name = pod["metadata"]["name"] + container_status = pod["status"].get("containerStatuses", []) + if container_status: + state = container_status[0].get("state", {}) + if "waiting" in state: + reason = state["waiting"].get("reason", "Unknown") + pod_statuses[pod_name] = reason + elif "running" in state: + pod_statuses[pod_name] = "Running" + else: + pod_statuses[pod_name] = "Terminated" + else: + pod_statuses[pod_name] = "No Status" + + print("Pod Statuses:") + for pod, status in pod_statuses.items(): + print(f" - {pod}: {status}") + if status != "Running": + print(f"Pod {pod} is not running. Status: {status}") + return {"success": False} + print("All pods are running.") + return {"success": True} + except Exception as e: + print(f"Error during evaluation: {str(e)}") + return {"success": False} + + + + def evaluate(self) -> dict: + ns = self.namespace + name = self.cr_name + results = {} + evaluatePods = self.evaluatePods() + print(f"Pod readiness: {evaluatePods}") + + cr = json.loads(self.kubectl.exec_command( + f"kubectl get tidbcluster {name} -n tidb-cluster -o json" + )) + pd_sc = (cr.get("spec", {}).get("pd", {}) or {}).get("storageClassName") + tikv_sc = (cr.get("spec", {}).get("tikv", {}) or {}).get("storageClassName") + + pvc_pd_json = json.loads(self.kubectl.exec_command( + f"kubectl get pvc -n {ns} " + f"-l app.kubernetes.io/instance={name},app.kubernetes.io/component=pd -o json" + )) + pvc_tikv_json = json.loads(self.kubectl.exec_command( + f"kubectl get pvc -n {ns} " + f"-l app.kubernetes.io/instance={name},app.kubernetes.io/component=tikv -o json" + )) + + def summarize_pvcs(pvc_list): + out = [] + for p in pvc_list.get("items", []): + meta = p.get("metadata", {}) or {} + spec = p.get("spec", {}) or {} + stat = p.get("status", {}) or {} + out.append({ + "name": meta.get("name"), + "storageClassName": spec.get("storageClassName"), + "phase": stat.get("phase"), + }) + return out + + pvc_pd = summarize_pvcs(pvc_pd_json) + pvc_tikv = summarize_pvcs(pvc_tikv_json) + + events_tail = self.kubectl.exec_command( + f"kubectl get events -n {ns} --sort-by=.metadata.creationTimestamp | tail -n 50" + ) + + BAD = "ThisIsAStorageClass" + cr_has_bad = (pd_sc == BAD) or (tikv_sc == BAD) + pvc_shows_bad = any(e.get("storageClassName") == BAD for e in pvc_pd + pvc_tikv) + any_pending = any(e.get("phase") == "Pending" for e in pvc_pd + pvc_tikv) + print (f"cr_has_bad: {cr_has_bad}, pvc_shows_bad: {pvc_shows_bad}, any_pending: {any_pending}") + + fault_applied = cr_has_bad or pvc_shows_bad + success = not fault_applied + + return { + "success": success, + "cr_values": {"pd.storageClassName": pd_sc, "tikv.storageClassName": tikv_sc}, + "pvc_pd": pvc_pd, + "pvc_tikv": pvc_tikv, + "any_pvc_pending": any_pending, + "events_tail": events_tail[-2000:], + "fault_applied": fault_applied + } + \ No newline at end of file diff --git a/sregym/conductor/oracles/operator_misoperation/overload_replicas_mitigation.py b/sregym/conductor/oracles/operator_misoperation/overload_replicas_mitigation.py new file mode 100644 index 0000000..2bfc598 --- /dev/null +++ b/sregym/conductor/oracles/operator_misoperation/overload_replicas_mitigation.py @@ -0,0 +1,100 @@ +import json +import yaml +import tempfile +from sregym.conductor.oracles.base import Oracle + +class OverloadReplicasMitigationOracle(Oracle): + def __init__(self, problem, deployment_name: str): + super().__init__(problem) + self.cr_name = "basic" + self.deployment_name = deployment_name + self.namespace = "tidb-cluster" + self.kubectl = problem.kubectl + + def evaluatePods(self) -> dict: + print("== Evaluating pod readiness ==") + try: + output = self.kubectl.exec_command( + f"kubectl get pods -n {self.namespace} -o yaml" + ) + pods = yaml.safe_load(output) + pods_list = pods.get("items", []) + pod_statuses = {} + for pod in pods_list: + pod_name = pod["metadata"]["name"] + container_status = pod["status"].get("containerStatuses", []) + if container_status: + state = container_status[0].get("state", {}) + if "waiting" in state: + reason = state["waiting"].get("reason", "Unknown") + pod_statuses[pod_name] = reason + elif "running" in state: + pod_statuses[pod_name] = "Running" + else: + pod_statuses[pod_name] = "Terminated" + else: + pod_statuses[pod_name] = "No Status" + + print("Pod Statuses:") + for pod, status in pod_statuses.items(): + print(f" - {pod}: {status}") + if status != "Running": + print(f"Pod {pod} is not running. Status: {status}") + return {"success": False} + print("All pods are running.") + return {"success": True} + except Exception as e: + print(f"Error during evaluation: {str(e)}") + return {"success": False} + + + + def evaluate(self) -> dict: + ns = self.namespace + name = "basic" + results = {} + evaluatePods = self.evaluatePods() + print(f"Pod readiness: {evaluatePods}") + + cr = json.loads(self.kubectl.exec_command( + f"kubectl get tidbcluster {name} -n tidb-cluster -o json" + )) + desired = (cr.get("spec", {}).get("tidb", {}) or {}).get("replicas") + + sts_name = f"{name}-tidb" + try: + sts = json.loads(self.kubectl.exec_command( + f"kubectl get sts {sts_name} -n {ns} -o json" + )) + sts_replicas = (sts.get("spec", {}) or {}).get("replicas") + sts_ready = (sts.get("status", {}) or {}).get("readyReplicas") + sts_current = (sts.get("status", {}) or {}).get("replicas") + except Exception: + sts = {} + sts_replicas = sts_ready = sts_current = None + + try: + pods = json.loads(self.kubectl.exec_command( + f"kubectl get pods -n {ns} " + f"-l app.kubernetes.io/instance={name},app.kubernetes.io/component=tidb -o json" + )) + pod_count = len(pods.get("items", [])) + except Exception: + pod_count = None + + fault_applied = (desired == 100000) + print("== Evaluation Result ===") + print(f"CR desired replicas: {desired}") + print(f"StatefulSet replicas: {sts_replicas}") + print(f"StatefulSet current replicas: {sts_current}") + print(f"StatefulSet ready replicas: {sts_ready}") + print(f"TiDB pod count: {pod_count}") + print(f"Fault applied: {fault_applied}") + results["success"] = not fault_applied + + return results + + + + + \ No newline at end of file diff --git a/sregym/conductor/oracles/operator_misoperation/security_context_mitigation.py b/sregym/conductor/oracles/operator_misoperation/security_context_mitigation.py new file mode 100644 index 0000000..e776a53 --- /dev/null +++ b/sregym/conductor/oracles/operator_misoperation/security_context_mitigation.py @@ -0,0 +1,113 @@ +import json +import yaml +import tempfile +from sregym.conductor.oracles.base import Oracle + +class SecurityContextMitigationOracle(Oracle): + def __init__(self, problem, deployment_name: str): + super().__init__(problem) + self.deployment_name = deployment_name + self.namespace = problem.namespace + self.kubectl = problem.kubectl + + def evaluatePods(self) -> dict: + print("== Evaluating pod readiness ==") + try: + output = self.kubectl.exec_command( + f"kubectl get pods -n {self.namespace} -o yaml" + ) + pods = yaml.safe_load(output) + pods_list = pods.get("items", []) + pod_statuses = {} + for pod in pods_list: + pod_name = pod["metadata"]["name"] + container_status = pod["status"].get("containerStatuses", []) + if container_status: + state = container_status[0].get("state", {}) + if "waiting" in state: + reason = state["waiting"].get("reason", "Unknown") + pod_statuses[pod_name] = reason + elif "running" in state: + pod_statuses[pod_name] = "Running" + else: + pod_statuses[pod_name] = "Terminated" + else: + pod_statuses[pod_name] = "No Status" + + print("Pod Statuses:") + for pod, status in pod_statuses.items(): + print(f" - {pod}: {status}") + if status != "Running": + print(f"Pod {pod} is not running. Status: {status}") + return {"success": False} + print("All pods are running.") + return {"success": True} + except Exception as e: + print(f"Error during evaluation: {str(e)}") + return {"success": False} + + + + def evaluate(self) -> dict: + ns = self.namespace + name = "basic" + evaluatePods = self.evaluatePods() + print(f"Pod Readiness: {evaluatePods}") + + cr = json.loads(self.kubectl.exec_command( + f"kubectl get tidbcluster {name} -n tidb-cluster -o json" + )) + run_as_user = ( + cr.get("spec", {}) + .get("tidb", {}) + .get("podSecurityContext", {}) + .get("runAsUser") + ) + + sts_name = f"{name}-tidb" + sts_run_as_user = None + try: + sts = json.loads(self.kubectl.exec_command( + f"kubectl get sts {sts_name} -n {ns} -o json" + )) + sts_run_as_user = ( + sts.get("spec", {}) + .get("template", {}) + .get("spec", {}) + .get("securityContext", {}) + .get("runAsUser") + ) + except Exception: + pass + + pod_run_as_users = [] + try: + pods = json.loads(self.kubectl.exec_command( + f"kubectl get pods -n {ns} " + f"-l app.kubernetes.io/instance={name},app.kubernetes.io/component=tidb -o json" + )) + for item in pods.get("items", []): + pod_run_as_users.append( + (item.get("metadata", {}).get("name"), + (item.get("spec", {}).get("securityContext") or {}).get("runAsUser")) + ) + except Exception: + pass + print("== Evaluation Result ===") + print(f"CR runAsUser: {run_as_user}") + print(f"StatefulSet runAsUser: {sts_run_as_user}") + print(f"Pod runAsUsers: {pod_run_as_users}") + print(f"Fault applied: {run_as_user == -1}") + + + fault_present = (run_as_user == -1) + return { + "success": not fault_present, + "cr_runAsUser": run_as_user, + "sts_runAsUser": sts_run_as_user, + "pod_runAsUsers": pod_run_as_users, + "fault_applied": fault_present + } + + + \ No newline at end of file diff --git a/sregym/conductor/oracles/operator_misoperation/wrong_update_strategy_mitigation.py b/sregym/conductor/oracles/operator_misoperation/wrong_update_strategy_mitigation.py new file mode 100644 index 0000000..4a89d77 --- /dev/null +++ b/sregym/conductor/oracles/operator_misoperation/wrong_update_strategy_mitigation.py @@ -0,0 +1,87 @@ +import json +import yaml +import tempfile +from sregym.conductor.oracles.base import Oracle + +class WrongUpdateStrategyMitigationOracle(Oracle): + def __init__(self, problem, deployment_name: str): + super().__init__(problem) + self.deployment_name = deployment_name + self.namespace = problem.namespace + self.kubectl = problem.kubectl + + def evaluatePods(self) -> dict: + print("== Evaluating pod readiness ==") + try: + output = self.kubectl.exec_command( + f"kubectl get pods -n {self.namespace} -o yaml" + ) + pods = yaml.safe_load(output) + pods_list = pods.get("items", []) + pod_statuses = {} + for pod in pods_list: + pod_name = pod["metadata"]["name"] + container_status = pod["status"].get("containerStatuses", []) + if container_status: + state = container_status[0].get("state", {}) + if "waiting" in state: + reason = state["waiting"].get("reason", "Unknown") + pod_statuses[pod_name] = reason + elif "running" in state: + pod_statuses[pod_name] = "Running" + else: + pod_statuses[pod_name] = "Terminated" + else: + pod_statuses[pod_name] = "No Status" + + print("Pod Statuses:") + for pod, status in pod_statuses.items(): + print(f" - {pod}: {status}") + if status != "Running": + print(f"Pod {pod} is not running. Status: {status}") + return {"success": False} + print("All pods are running.") + return {"success": True} + except Exception as e: + print(f"Error during evaluation: {str(e)}") + return {"success": False} + + + + def evaluate(self) -> dict: + ns = self.namespace + name = "basic" + + cr = json.loads(self.kubectl.exec_command( + f"kubectl get tidbcluster {name} -n tidb-cluster -o json" + )) + cr_strategy = ( + cr.get("spec", {}) + .get("tidb", {}) + .get("statefulSetUpdateStrategy") + ) + + sts_name = f"{name}-tidb" + sts_type = None + try: + sts = json.loads(self.kubectl.exec_command( + f"kubectl get sts {sts_name} -n {ns} -o json" + )) + sts_type = ( + sts.get("spec", {}) + .get("updateStrategy", {}) + .get("type") + ) + except Exception: + pass + + BAD = "SomeStrategyForUpdate" + fault_applied = (cr_strategy == BAD) + print(f"cr_strategy: {cr_strategy}, sts_type: {sts_type}, fault_applied: {fault_applied}") + + return { + "success": not fault_applied, + "cr_statefulSetUpdateStrategy": cr_strategy, + "sts_updateStrategy_type": sts_type, + "fault_applied": fault_applied + } \ No newline at end of file diff --git a/sregym/conductor/oracles/rolling_update_misconfiguration_mitigation.py b/sregym/conductor/oracles/rolling_update_misconfiguration_mitigation.py new file mode 100644 index 0000000..d323716 --- /dev/null +++ b/sregym/conductor/oracles/rolling_update_misconfiguration_mitigation.py @@ -0,0 +1,57 @@ +import json +import yaml +import tempfile +from sregym.conductor.oracles.base import Oracle + +class RollingUpdateMitigationOracle(Oracle): + def __init__(self, problem, deployment_name: str): + super().__init__(problem) + self.deployment_name = deployment_name + self.namespace = problem.namespace + self.kubectl = problem.kubectl + + def evaluate(self) -> dict: + print("== Rolling Update Mitigation Evaluation ==") + + try: + output = self.kubectl.exec_command( + f"kubectl get deployment {self.deployment_name} -n {self.namespace} -o yaml" + ) + deployment = yaml.safe_load(output) + + new_init = [ {"name": "hang-init", "image": "busybox", "command": ["/bin/sh", "-c", "sleep 15"]}] + + deployment["spec"]["template"]["spec"]["initContainers"] = new_init + with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as tmp: + yaml.dump(deployment, tmp) + tmp_path = tmp.name + patch_cmd = ( + f"kubectl patch deployment {self.deployment_name} -n {self.namespace} --patch-file {tmp_path}" + ) + patch_out = self.kubectl.exec_command(patch_cmd) + print(f"Patched initContainers: {patch_out}") + + self.kubectl.wait_for_ready(self.namespace) + + print("🔄 Triggering test rollout…") + self.kubectl.exec_command( + f"kubectl rollout restart deployment {self.deployment_name} -n {self.namespace}" + ) + + deploy_json = self.kubectl.exec_command( + f"kubectl get deployment {self.deployment_name}" + f" -n {self.namespace} -o json" + ) + deploy = json.loads(deploy_json) + avail = deploy["status"].get("availableReplicas", 0) + + if avail > 0: + print("✅ Mitigation successful: deployment reports availableReplicas >", avail) + return {"success": True} + else: + print("❌ Mitigation failed: No pods available") + return {"success": False} + + except Exception as e: + print(f"❌ Error during evaluation: {str(e)}") + return {"success": False} diff --git a/sregym/conductor/oracles/rpc_retry_storm_mitigation.py b/sregym/conductor/oracles/rpc_retry_storm_mitigation.py new file mode 100644 index 0000000..2a112c2 --- /dev/null +++ b/sregym/conductor/oracles/rpc_retry_storm_mitigation.py @@ -0,0 +1,180 @@ +from sregym.conductor.oracles.base import Oracle + +import pandas as pd +import numpy as np +import io + +class RPCRetryStormMitigationOracle(Oracle): + importance = 1.0 + + def analyze_latency_trend(self, csv_file_path, threshold_ratio=2.0): + try: + # Read CSV file + df = pd.read_csv(io.StringIO(csv_file_path)) + + # Ensure correct column names + if 'Second' not in df.columns or 'AverageLatency(ns)' not in df.columns: + print(f"Error: CSV file is missing required columns") + return False + + # Convert data types + df['Second'] = pd.to_numeric(df['Second'], errors='coerce') + df['AverageLatency(ns)'] = pd.to_numeric(df['AverageLatency(ns)'], errors='coerce') + + # Remove invalid data + df = df.dropna() + + # Extract 5-55 seconds data + early_period = df[(df['Second'] >= 5) & (df['Second'] <= 55)] + + # Extract 95-119 seconds data + late_period = df[(df['Second'] >= 95) & (df['Second'] <= 119)] + + # Check if data is sufficient + if len(early_period) < 30 or len(late_period) < 15: + print(f"Warning: Insufficient data points, early: {len(early_period)}, late: {len(late_period)}") + return False + + # Get latency data + early_latency = early_period['AverageLatency(ns)'].values + late_latency = late_period['AverageLatency(ns)'].values + + # Calculate statistics + early_mean = np.mean(early_latency) + late_mean = np.mean(late_latency) + early_std = np.std(early_latency) + late_std = np.std(late_latency) + + print(f"5-55 seconds: Mean = {early_mean:.2f}, Standard Deviation = {early_std:.2f}") + print(f"95-119 seconds: Mean = {late_mean:.2f}, Standard Deviation = {late_std:.2f}") + + # Standard 1: Mean Comparison + # If the late mean is significantly higher than the early mean (exceeds the threshold ratio), it is deemed inconsistent + mean_ratio = late_mean / early_mean if early_mean > 0 else float('inf') + print(f"Mean Ratio: {mean_ratio:.2f}") + + if mean_ratio > threshold_ratio: + print(f"Mean difference is too large (Ratio: {mean_ratio:.2f} > {threshold_ratio})") + return False + + # Standard 2: Coefficient of Variation Comparison + # Coefficient of Variation = Standard Deviation / Mean, measures relative variability + early_cv = early_std / early_mean if early_mean > 0 else 0 + late_cv = late_std / late_mean if late_mean > 0 else 0 + + print(f"Coefficient of Variation - Early: {early_cv:.4f}, Late: {late_cv:.4f}") + + # If the late coefficient of variation is significantly greater than the early, it is deemed unstable + if late_cv > early_cv * 5: + print(f"Late variation is too large (Late: {late_cv:.4f} > Early: {early_cv * 5:.4f})") + return False + + # Standard 3: Magnitude Comparison + # Check for significant jumps in magnitude (e.g., from milliseconds to seconds) + early_median = np.median(early_latency) + late_median = np.median(late_latency) + + if late_median > early_median * 100: # Define a significant jump in magnitude + print(f"Significant jump in magnitude detected: Late median is {late_median/early_median:.1f} times the Early median") + return False + + # Standard 4: Outlier Detection + # Check for a large number of high outliers in the late period + early_q95 = np.percentile(early_latency, 95) + late_outliers = np.sum(late_latency > early_q95 * 10) # Define a high outlier as > 10x early Q95 + late_outlier_ratio = late_outliers / len(late_latency) + + print(f"Late outlier ratio: {late_outlier_ratio:.2%}") + + if late_outlier_ratio > 0.3: # If more than 30% of points are outliers + print(f"Late outlier ratio is too high") + return False + + # Standard 5: Persistent Degradation Detection + # Check if most of the late period latency is significantly higher than the early period + persistent_degradation_threshold = 3.0 # This multiplier can be adjusted + persistent_degradation_ratio = 0.8 # 80% of late data points need to meet the condition + + # Calculate the threshold for early latency (e.g., 95th percentile) + early_threshold = np.percentile(early_latency, 95) + + # Check how many late data points exceed the early threshold by the specified multiplier + degraded_points = np.sum(late_latency > early_threshold * persistent_degradation_threshold) + degraded_ratio = degraded_points / len(late_latency) + + print(f"Persistent degradation ratio: {degraded_ratio:.2%} (Points exceeding early 95th percentile by {persistent_degradation_threshold} times)") + + if degraded_ratio > persistent_degradation_ratio: + print(f"Persistent degradation detected: {degraded_ratio:.2%} > {persistent_degradation_ratio:.2%}") + return False + + # Optional: Stricter check - use early mean as baseline + early_mean_threshold = early_mean * persistent_degradation_threshold + points_above_mean_threshold = np.sum(late_latency > early_mean_threshold) + ratio_above_mean = points_above_mean_threshold / len(late_latency) + + print(f"Late points exceeding early mean by {persistent_degradation_threshold} times: {ratio_above_mean:.2%}") + + if ratio_above_mean > persistent_degradation_ratio: + print(f"Persistent degradation detected: {ratio_above_mean:.2%} > {persistent_degradation_ratio:.2%}") + return False + + print("Latency trend is consistent") + return True + + except Exception as e: + print(f"An error occurred during analysis: {e}") + return False + + def run_workload(self, problem, kubectl, namespace='default'): + problem.start_workload() + job_name = problem.wrk.job_name + kubectl.wait_for_job_completion(job_name=job_name, namespace=namespace, timeout=1200) + workentries = problem.wrk.retrievelog() + workentry = workentries[0] if workentries else None + print(f"Workload Entry: {workentry}") + return workentry + + def evaluate(self) -> dict: + print("== Mitigation Evaluation ==") + kubectl = self.problem.kubectl + namespace = self.problem.namespace + + results = {} + workentry = self.run_workload(problem=self.problem, kubectl=kubectl) + + not_metastable = self.analyze_latency_trend(workentry.log) if workentry else False + + if not_metastable: + # Check if all services (not only faulty service) is back to normal (Running) + pod_list = kubectl.list_pods(namespace) + for pod in pod_list.items: + if pod.status.container_statuses: + # Check container statuses + for container_status in pod.status.container_statuses: + if ( + container_status.state.waiting + and container_status.state.waiting.reason == "CrashLoopBackOff" + ): + print(f"Container {container_status.name} is in CrashLoopBackOff") + not_metastable = False + elif ( + container_status.state.terminated + and container_status.state.terminated.reason != "Completed" + ): + print( + f"Container {container_status.name} is terminated with reason: {container_status.state.terminated.reason}" + ) + not_metastable = False + elif not container_status.ready: + print(f"Container {container_status.name} is not ready") + not_metastable = False + + if not not_metastable: + break + + results["success"] = not_metastable + + print(f"Mitigation Result: {'Pass ✅' if not_metastable else 'Fail ❌'}") + + return results diff --git a/sregym/conductor/oracles/scale_pod_zero_mitigation.py b/sregym/conductor/oracles/scale_pod_zero_mitigation.py new file mode 100644 index 0000000..35d0f7b --- /dev/null +++ b/sregym/conductor/oracles/scale_pod_zero_mitigation.py @@ -0,0 +1,55 @@ +from sregym.conductor.oracles.base import Oracle + + +class ScalePodZeroMitigationOracle(Oracle): + importance = 1.0 + + def evaluate(self) -> dict: + print("== Mitigation Evaluation ==") + + kubectl = self.problem.kubectl + namespace = self.problem.namespace + faulty_service = self.problem.faulty_service + results = {} + + all_normal = True + + deployment = kubectl.get_deployment(faulty_service, namespace) + + if deployment is None: + print(f"Deployment for {faulty_service} not found") + all_normal = False + else: + desired_replicas = deployment.spec.replicas + available_replicas = deployment.status.available_replicas + + if desired_replicas != 1 or available_replicas != 1: + print( + f"Faulty service {faulty_service} has {available_replicas} available replicas out of {desired_replicas} desired" + ) + all_normal = False + + # Check if all services are running normally + pod_list = kubectl.list_pods(namespace) + for pod in pod_list.items: + for container_status in pod.status.container_statuses: + if container_status.state.waiting and container_status.state.waiting.reason == "CrashLoopBackOff": + print(f"Container {container_status.name} is in CrashLoopBackOff") + all_normal = False + elif container_status.state.terminated and container_status.state.terminated.reason != "Completed": + print( + f"Container {container_status.name} is terminated with reason: {container_status.state.terminated.reason}" + ) + all_normal = False + elif not container_status.ready: + print(f"Container {container_status.name} is not ready") + all_normal = False + + if not all_normal: + break + + results["success"] = all_normal + + print(f"Mitigation Result: {'Pass ✅' if all_normal else 'Fail ❌'}") + + return results diff --git a/sregym/conductor/oracles/service_endpoint_mitigation.py b/sregym/conductor/oracles/service_endpoint_mitigation.py new file mode 100644 index 0000000..249a5f1 --- /dev/null +++ b/sregym/conductor/oracles/service_endpoint_mitigation.py @@ -0,0 +1,32 @@ +from sregym.conductor.oracles.base import Oracle + + +class ServiceEndpointMitigationOracle(Oracle): + """ + Oracle that verifies every Service has at least one ready endpoint. + """ + + importance = 1.0 + + def evaluate(self) -> dict: + print("== Service Endpoints Evaluation ==") + + kubectl = self.problem.kubectl + namespace = self.problem.namespace + + # Always verify every Service in the namespace. + services_to_check = [svc.metadata.name for svc in kubectl.list_services(namespace).items] + + for svc_name in services_to_check: + try: + ep = kubectl.core_v1_api.read_namespaced_endpoints(svc_name, namespace) + has_ready = any(subset.addresses for subset in (ep.subsets or [])) + if not has_ready: + print(f"❌ Service {svc_name} has NO ready endpoints") + return {"success": False} + except Exception as e: + print(f"❌ Error retrieving endpoints for service {svc_name}: {e}") + return {"success": False} + + print("[✅] All checked services have ready endpoints.") + return {"success": True} diff --git a/sregym/conductor/oracles/sustained_readiness.py b/sregym/conductor/oracles/sustained_readiness.py new file mode 100644 index 0000000..7fb53a2 --- /dev/null +++ b/sregym/conductor/oracles/sustained_readiness.py @@ -0,0 +1,88 @@ +import time +from sregym.conductor.oracles.base import Oracle + + +class SustainedReadinessOracle(Oracle): + + importance = 1.0 + + def __init__(self, problem, buffer_period=10, sustained_period=60, check_interval=0.5): + super().__init__(problem) + self.buffer_period = buffer_period + self.sustained_period = sustained_period + self.check_interval = check_interval + + def evaluate(self) -> dict: + print("== Sustained Readiness Evaluation ==") + + kubectl = self.problem.kubectl + namespace = self.problem.namespace + + print(f"⏳ Waiting up to {self.buffer_period}s for all pods to become ready...") + start_time = time.time() + all_ready_start = None + + while time.time() - start_time < self.buffer_period: + if self._check_all_pods_ready(kubectl, namespace): + all_ready_start = time.time() + print(f"✅ All pods ready after {all_ready_start - start_time:.1f}s") + break + time.sleep(self.check_interval) + + if all_ready_start is None: + print(f"❌ All the pods did not become ready within {self.buffer_period}s buffer period") + return {"success": False} + + + print(f"⏱️ Monitoring pods for {self.sustained_period}s sustained readiness...") + monitoring_start = time.time() + + while time.time() - monitoring_start < self.sustained_period: + elapsed = time.time() - monitoring_start + + if not self._check_all_pods_ready(kubectl, namespace, verbose=True): + print(f"❌ Pod readiness check failed after {elapsed:.1f}s of monitoring") + return {"success": False} + + if int(elapsed) % 10 == 0 and elapsed > 0: + print(f"🚧 Pods still ready after {int(elapsed)}s...") + + time.sleep(self.check_interval) + + print(f"✅ All pods remained ready for the full {self.sustained_period}s period!") + return {"success": True} + + def _check_all_pods_ready(self, kubectl, namespace, verbose=False) -> bool: + try: + pod_list = kubectl.list_pods(namespace) + all_normal = True + + for pod in pod_list.items: + if pod.status.phase != "Running": + if verbose: + print(f"⚠️ Pod {pod.metadata.name} is in phase: {pod.status.phase}") + all_normal = False + break + + for container_status in pod.status.container_statuses: + if container_status.state.waiting and container_status.state.waiting.reason: + if verbose: + print(f"⚠️ Container {container_status.name} is waiting: {container_status.state.waiting.reason}") + all_normal = False + + elif container_status.state.terminated and container_status.state.terminated.reason != "Completed": + if verbose: + print(f"⚠️ Container {container_status.name} terminated: {container_status.state.terminated.reason}") + all_normal = False + + elif not container_status.ready: + if verbose: + print(f"⚠️ Container {container_status.name} is not ready") + all_normal = False + + return all_normal + + except Exception as e: + if verbose: + print(f"⚠️ Error checking pod readiness: {e}") + return False \ No newline at end of file diff --git a/sregym/conductor/oracles/target_port_mitigation.py b/sregym/conductor/oracles/target_port_mitigation.py new file mode 100644 index 0000000..7d73fc9 --- /dev/null +++ b/sregym/conductor/oracles/target_port_mitigation.py @@ -0,0 +1,53 @@ +from sregym.conductor.oracles.base import Oracle +from sregym.conductor.oracles.utils import is_exact_match + + +class TargetPortMisconfigMitigationOracle(Oracle): + importance = 1.0 + + def evaluate(self) -> dict: + print("== Mitigation Evaluation ==") + + kubectl = self.problem.kubectl + namespace = self.problem.namespace + faulty_service = self.problem.faulty_service + results = {} + + # Check if target port has been reset to 9090 + configs = kubectl.get_service_json(faulty_service, namespace) + target_port = configs["spec"]["ports"][0]["targetPort"] + all_normal = is_exact_match(target_port, 9090) + + if all_normal: + # Check if all services (not only faulty service) is back to normal (Running) + pod_list = kubectl.list_pods(namespace) + for pod in pod_list.items: + if pod.status.container_statuses: + # Check container statuses + for container_status in pod.status.container_statuses: + if ( + container_status.state.waiting + and container_status.state.waiting.reason == "CrashLoopBackOff" + ): + print(f"Container {container_status.name} is in CrashLoopBackOff") + all_normal = False + elif ( + container_status.state.terminated + and container_status.state.terminated.reason != "Completed" + ): + print( + f"Container {container_status.name} is terminated with reason: {container_status.state.terminated.reason}" + ) + all_normal = False + elif not container_status.ready: + print(f"Container {container_status.name} is not ready") + all_normal = False + + if not all_normal: + break + + results["success"] = all_normal + + print(f"Mitigation Result: {'Pass ✅' if all_normal else 'Fail ❌'}") + + return results diff --git a/sregym/conductor/oracles/utils.py b/sregym/conductor/oracles/utils.py new file mode 100644 index 0000000..e0e4bbf --- /dev/null +++ b/sregym/conductor/oracles/utils.py @@ -0,0 +1,11 @@ +"""Helper functions for evaluation""" + + +def is_exact_match(pred: int | str | list, target: int | str | list) -> bool: + """Return True if the prediction is an exact match to the target.""" + return pred == target + + +def is_subset(pred: list, target: list) -> bool: + """Return True if the prediction is a subset of the target.""" + return set(pred).issubset(set(target)) diff --git a/sregym/conductor/oracles/valkey_auth_mitigation.py b/sregym/conductor/oracles/valkey_auth_mitigation.py new file mode 100644 index 0000000..cb87b22 --- /dev/null +++ b/sregym/conductor/oracles/valkey_auth_mitigation.py @@ -0,0 +1,44 @@ +from sregym.conductor.oracles.base import Oracle + + +class ValkeyAuthMitigation(Oracle): + importance = 1.0 + + def evaluate(self) -> dict: + print("== Mitigation Evaluation ==") + results = {"success": False} + + kubectl = self.problem.kubectl + namespace = self.problem.namespace + + # Find the valkey pod + valkey_pods = [ + pod.metadata.name for pod in kubectl.list_pods(namespace).items if "valkey-cart" in pod.metadata.name + ] + + if not valkey_pods: + print("❌ No Valkey pod found.") + return results + + valkey_pod = valkey_pods[0] + print(f"🔍 Found Valkey pod: {valkey_pod}") + + # Check the current password setting in Valkey + try: + command = f"kubectl exec -n {namespace} {valkey_pod} -- valkey-cli CONFIG GET requirepass" + output = kubectl.exec_command(command) + + if "requirepass" in output: + parts = output.strip().splitlines() + if len(parts) == 2 and parts[1] == "": + results["success"] = True + print("✅ Valkey password is cleared. Authentication restored.") + else: + print(f"❌ Valkey password still set: {parts[1]}") + else: + print(f"❌ Unexpected valkey-cli CONFIG GET output: {output}") + except Exception as e: + print(f"❌ Error querying Valkey password: {e}") + + print(f"Mitigation Result: {'Pass ✅' if results['success'] else 'Fail ❌'}") + return results diff --git a/sregym/conductor/oracles/workload.py b/sregym/conductor/oracles/workload.py new file mode 100644 index 0000000..27bdde0 --- /dev/null +++ b/sregym/conductor/oracles/workload.py @@ -0,0 +1,37 @@ +from sregym.conductor.oracles.base import Oracle + + +def truncate(text: str, length: int = 100) -> str: + """Truncate text to a specified length, adding ellipsis if truncated.""" + if len(text) > length: + text = text[:length] + "..." + text = text.encode("unicode_escape").decode("utf-8") + return text + + +class WorkloadOracle(Oracle): + importance = 3.0 + + def __init__(self, problem, wrk_manager=None): + super().__init__(problem) + self.wrk = wrk_manager + + def evaluate(self) -> dict: + try: + self.wrk.collect(number=1) + entries = self.wrk.collect(number=50) + for entry in entries: + if not entry.ok: + print(f"[❌] Workload entry at {entry.time} failed with log: {truncate(entry.log, 100)}") + return { + "success": False, + } + print(f"[✅] Successfully collected {len(entries)} workload entries.") + return { + "success": True, + } + except Exception as e: + print(f"[❌] Error during workload collection: {e}") + return { + "success": False, + } diff --git a/sregym/conductor/oracles/wrong_bin_mitigation.py b/sregym/conductor/oracles/wrong_bin_mitigation.py new file mode 100644 index 0000000..b9fbebd --- /dev/null +++ b/sregym/conductor/oracles/wrong_bin_mitigation.py @@ -0,0 +1,37 @@ +from sregym.conductor.oracles.base import Oracle + + +class WrongBinMitigationOracle(Oracle): + importance = 1.0 + + def evaluate(self) -> dict: + print("== Evaluation ==") + + kubectl = self.problem.kubectl + namespace = self.problem.namespace + results = {} + + # Check if the deployment was updated to use the right binary + expected_command = ( + "profile" # Command dictates which binary will be ran, we want to run /go/bin/profile and not /go/bin/geo + ) + + try: + deployment = kubectl.get_deployment(self.problem.faulty_service, namespace) + containers = deployment.spec.template.spec.containers + + for container in containers: + command = container.command or [] + if expected_command not in command: + print(f"[❌] Deployment for container '{container.name}' is using wrong binary: {command}") + results["success"] = False + return results + + print("[✅] Deployment is using the correct binary.") + results["success"] = True + return results + + except Exception as e: + print(f"[ERROR] Exception during evaluation: {e}") + results["success"] = False + return results diff --git a/sregym/conductor/parser.py b/sregym/conductor/parser.py new file mode 100644 index 0000000..c01ce6b --- /dev/null +++ b/sregym/conductor/parser.py @@ -0,0 +1,182 @@ +"""Parser for various APIs that an Agent may invoke.""" + +import ast +import re +import logging + +class ResponseParsingError(Exception): + def __init__(self, message): + super().__init__(f"Error parsing response: {message}") + self.message = message + + +class ResponseParser: + def __init__(self): + self.logger = logging.getLogger("sregym-global") + self.local_logger = logging.getLogger("all.sregym.conductor") + + def parse(self, response: str) -> dict: + """Parses the response string to extract the API name and arguments. + + Args: + response (str): The response string (typically an agent's response). + + Returns: + dict: The parsed API name and arguments. + """ + code_block = self.extract_codeblock(response) + self.local_logger.debug(f"Extracted code block from submit: {code_block}") + context = self.extract_context(response) + self.local_logger.debug(f"Extracted context from submit: {context}") + api_name = self.parse_api_name(code_block) + self.local_logger.debug(f"Parsed API name from submit: {api_name}") + args, kwargs = self.parse_args(code_block, is_shell_command=api_name == "exec_shell") + self.local_logger.debug(f"Parsed args and kwargs from submit: {args}, {kwargs}") + return { + "api_name": api_name, + "args": args, + "kwargs": kwargs, + "context": context, + } + + def extract_codeblock(self, response: str) -> str: + """Extract a markdown code block from a string. + + Args: + response (str): The response string. + + Returns: + str: The extracted code block. + """ + outputlines = response.split("\n") + indexlines = [i for i, line in enumerate(outputlines) if "```" in line] + if len(indexlines) < 2: + return "" + return "\n".join(outputlines[indexlines[0] + 1 : indexlines[1]]) + + def extract_context(self, response: str) -> list: + """Extract context outside of a code block. + + Args: + response (str): The response string. + + Returns: + list: The extracted context. + """ + pattern = r"(?:```[\s\S]*?```)|(.*?)(?:(?=```)|$)" + matches = re.findall(pattern, response, re.DOTALL) + context = [match.strip() for match in matches if match.strip()] + + return context + + def parse_api_name(self, response: str) -> str: + """Parses the API name from the response function call. + + >>> parse_api_name("get_logs()") + "get_logs" + + Args: + response (str): The response string. + + Returns: + str: The API name. + """ + + first_parenthesis = response.find("(") + if first_parenthesis != -1: + return response[:first_parenthesis].strip() + return "" + + def parse_args(self, response: str, is_shell_command=False) -> tuple[list, dict]: + """Parses the arguments of a function call. + + >>> parse_args("get_logs(10, 'error')") + [10, 'error'] + + Args: + response (str): The response string. + + Returns: + args (list): A list of positional arguments. + kwargs (dict): A dictionary of keyword arguments. + """ + first_parenthesis = response.find("(") + last_parenthesis = response.rfind(")") + + if first_parenthesis != -1 and last_parenthesis != -1: + args_str = response[first_parenthesis + 1 : last_parenthesis].strip() + + # case: no arguments + if not args_str: + return [], {} + + # case: single quoted string argument + if is_shell_command: + # remove keyword + if args_str.startswith("command="): + args_str = args_str.replace("command=", "").strip() + + if args_str.startswith('"') and args_str.endswith('"'): + arg_str = args_str.strip('"') + elif args_str.startswith("'") and args_str.endswith("'"): + arg_str = args_str.strip("'") + else: + raise ResponseParsingError("Error when parsing response: commands must be quoted strings") + + arg_str = arg_str.replace('\\"', '"').replace("\\'", "'") + return [arg_str], {} + + # case: positional/kwargs handled w/ ast.parse + try: + parsed = ast.parse(f"func({args_str})") + call = parsed.body[0].value + args, kwargs = [], {} + + for arg in call.args: + if isinstance(arg, ast.Constant): + args.append(arg.value) + elif isinstance(arg, (ast.List, ast.Tuple)): + args.append([self.eval_ast_node(elt) for elt in arg.elts]) + elif isinstance(arg, ast.Dict): + args.append( + { + self.eval_ast_node(key): self.eval_ast_node(value) + for key, value in zip(arg.keys, arg.values) + } + ) + else: + args.append(self.eval_ast_node(arg)) + + for kwarg in call.keywords: + kwargs[kwarg.arg] = self.eval_ast_node(kwarg.value) + + return args, kwargs + except Exception as e: + self.logger.info(f"[ERROR] Error parsing response: {str(e)}") + + if args_str: + self.local_logger.error(f"Error parsing response: {str(e)} content to be parsed: func({args_str})") + else: + self.local_logger.error(f"Error parsing response: {str(e)} content to be parsed: func()") + + raise ResponseParsingError(f"Error parsing response: {str(e)}") + + self.local_logger.error("No API call found!") + raise ResponseParsingError("No API call found!") + + def eval_ast_node(self, node): + """Evaluates an AST node to its Python value.""" + if isinstance(node, ast.Constant): + return node.value + elif isinstance(node, ast.List): + return [self.eval_ast_node(elt) for elt in node.elts] + elif isinstance(node, ast.Dict): + return {self.eval_ast_node(key): self.eval_ast_node(value) for key, value in zip(node.keys, node.values)} + elif isinstance(node, ast.Name): + if node.id == "True": + return True + elif node.id == "False": + return False + elif node.id == "None": + return None + raise ValueError(f"Unsupported AST node type: {type(node)}") diff --git a/sregym/conductor/problems/__init__.py b/sregym/conductor/problems/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sregym/conductor/problems/ad_service_failure.py b/sregym/conductor/problems/ad_service_failure.py new file mode 100644 index 0000000..81fc65b --- /dev/null +++ b/sregym/conductor/problems/ad_service_failure.py @@ -0,0 +1,32 @@ +"""Otel demo adServiceFailure feature flag fault.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_otel import OtelFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class AdServiceFailure(Problem): + def __init__(self): + self.app = AstronomyShop() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.injector = OtelFaultInjector(namespace=self.namespace) + self.faulty_service = "ad" + self.root_cause = f"The `{self.faulty_service}` service has a feature flag enabled that causes it to fail, resulting in service unavailability." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_fault("adFailure") + print(f"Fault: adServiceFailure | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.recover_fault("adFailure") diff --git a/sregym/conductor/problems/ad_service_high_cpu.py b/sregym/conductor/problems/ad_service_high_cpu.py new file mode 100644 index 0000000..070ab3c --- /dev/null +++ b/sregym/conductor/problems/ad_service_high_cpu.py @@ -0,0 +1,33 @@ +"""Otel demo adServiceHighCpu feature flag fault.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_otel import OtelFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class AdServiceHighCpu(Problem): + def __init__(self): + self.app = AstronomyShop() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.injector = OtelFaultInjector(namespace=self.namespace) + self.faulty_service = "ad" + self.root_cause = f"The `{self.faulty_service}` service has a feature flag enabled that causes high CPU usage, resulting in performance degradation." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_fault("adHighCpu") + print(f"Fault: AdServiceHighCpu | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.recover_fault("adHighCpu") diff --git a/sregym/conductor/problems/ad_service_manual_gc.py b/sregym/conductor/problems/ad_service_manual_gc.py new file mode 100644 index 0000000..eb4c63a --- /dev/null +++ b/sregym/conductor/problems/ad_service_manual_gc.py @@ -0,0 +1,32 @@ +"""Otel demo adServiceManualGc feature flag fault.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_otel import OtelFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class AdServiceManualGc(Problem): + def __init__(self): + self.app = AstronomyShop() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.injector = OtelFaultInjector(namespace=self.namespace) + self.faulty_service = "ad" + self.root_cause = f"The `{self.faulty_service}` service has a feature flag enabled that triggers manual garbage collection, causing performance degradation and potential service interruptions." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_fault("adManualGc") + print(f"Fault: adServiceManualGc | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.recover_fault("adManualGc") diff --git a/sregym/conductor/problems/assign_non_existent_node.py b/sregym/conductor/problems/assign_non_existent_node.py new file mode 100644 index 0000000..442560d --- /dev/null +++ b/sregym/conductor/problems/assign_non_existent_node.py @@ -0,0 +1,47 @@ +"""Assign pods to non existent node problem for the SocialNetwork application.""" + +import time + +from sregym.conductor.oracles.assign_non_existent_node_mitigation import AssignNonExistentNodeMitigationOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class AssignNonExistentNode(Problem): + def __init__(self): + self.app = SocialNetwork() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = "user-service" + self.root_cause = f"The deployment `{self.faulty_service}` is configured with a nodeSelector pointing to a non-existent node (extra-node), causing pods to remain in Pending state." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = AssignNonExistentNodeMitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._inject( + fault_type="assign_to_non_existent_node", + microservices=[self.faulty_service], + ) + time.sleep(25) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._recover( + fault_type="assign_to_non_existent_node", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/auth_miss_mongodb.py b/sregym/conductor/problems/auth_miss_mongodb.py new file mode 100644 index 0000000..64da916 --- /dev/null +++ b/sregym/conductor/problems/auth_miss_mongodb.py @@ -0,0 +1,44 @@ +"""MongoDB authentication missing problem in the SocialNetwork application.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class MongoDBAuthMissing(Problem): + def __init__(self): + self.app = SocialNetwork() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = "url-shorten-mongodb" + self.root_cause = f"The MongoDB service `{self.faulty_service}` is configured to require TLS authentication, but the certificates are not properly configured, causing connection failures." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = MitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._inject( + fault_type="auth_miss_mongodb", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._recover( + fault_type="auth_miss_mongodb", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/base.py b/sregym/conductor/problems/base.py new file mode 100644 index 0000000..ecdb646 --- /dev/null +++ b/sregym/conductor/problems/base.py @@ -0,0 +1,28 @@ +"""Problem base class""" + +from abc import ABC, abstractmethod + + +class Problem(ABC): + def __init__(self, app, namespace: str): + self.app = app + self.namespace = namespace + self.fault_injected = False + self.results = {} + self.root_cause = None # root cause of the problem in natural language + + # Optional: attach oracles in subclass + self.diagnosis_oracle = None + self.mitigation_oracle = None + + def requires_khaos(self) -> bool: + """Override this method to return True if the problem requires Khaos for fault injection.""" + return False + + @abstractmethod + def inject_fault(self): + pass + + @abstractmethod + def recover_fault(self): + pass diff --git a/sregym/conductor/problems/capacity_decrease_rpc_retry_storm.py b/sregym/conductor/problems/capacity_decrease_rpc_retry_storm.py new file mode 100644 index 0000000..a757ff3 --- /dev/null +++ b/sregym/conductor/problems/capacity_decrease_rpc_retry_storm.py @@ -0,0 +1,55 @@ +from sregym.conductor.oracles.detection import DetectionOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.rpc_retry_storm_mitigation import RPCRetryStormMitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.generators.workload.blueprint_hotel_work import BHotelWrk, BHotelWrkWorkloadManager +from sregym.service.apps.blueprint_hotel_reservation import BlueprintHotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class CapacityDecreaseRPCRetryStorm(Problem): + def __init__(self): + self.app = BlueprintHotelReservation() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = "rpc" + self.root_cause = f"The ConfigMap `{self.faulty_service}` has misconfigured RPC timeout (50ms) and retry settings (30 retries), causing an RPC retry storm that overwhelms the service. It is a metastable failure." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.mitigation_oracle = RPCRetryStormMitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector.inject_rpc_timeout_retries_misconfiguration(configmap=self.faulty_service) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + self.mitigation_oracle.run_workload(problem=self, kubectl=self.kubectl) + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector.recover_rpc_timeout_retries_misconfiguration(configmap=self.faulty_service) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + def create_workload(self, tput: int = None, duration: str = None, multiplier: int = None): + if tput is None: + tput = 3000 + if duration is None: + duration = "500s" + if multiplier is None: + multiplier = 1 + self.wrk = BHotelWrkWorkloadManager( + wrk=BHotelWrk(tput=tput, duration=duration, multiplier=multiplier), + CPU_containment=True, + ) + + def start_workload(self): + if not hasattr(self, "wrk"): + self.create_workload() + self.wrk.start() diff --git a/sregym/conductor/problems/cart_service_failure.py b/sregym/conductor/problems/cart_service_failure.py new file mode 100644 index 0000000..b6b3fe3 --- /dev/null +++ b/sregym/conductor/problems/cart_service_failure.py @@ -0,0 +1,32 @@ +"""Otel demo cartServiceFailure feature flag fault.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_otel import OtelFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class CartServiceFailure(Problem): + def __init__(self): + self.app = AstronomyShop() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.injector = OtelFaultInjector(namespace=self.namespace) + self.faulty_service = "cart" + self.root_cause = f"The `{self.faulty_service}` service has a feature flag enabled that causes it to fail, resulting in service unavailability." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_fault("cartFailure") + print(f"Fault: cartServiceFailure | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.recover_fault("cartFailure") diff --git a/sregym/conductor/problems/configmap_drift.py b/sregym/conductor/problems/configmap_drift.py new file mode 100644 index 0000000..31d6a16 --- /dev/null +++ b/sregym/conductor/problems/configmap_drift.py @@ -0,0 +1,69 @@ +"""ConfigMap drift problem - removes critical keys from mounted ConfigMap.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.missing_cm_key_mitigation import MissingCmKeyMitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class ConfigMapDrift(Problem): + def __init__(self, faulty_service: str = "geo"): + self.faulty_service = faulty_service + + self.app = HotelReservation() + + super().__init__(app=self.app, namespace=self.app.namespace) + + self.kubectl = KubeCtl() + self.root_cause = f"The ConfigMap `{self.faulty_service}-config` is missing critical configuration keys (e.g., GeoMongoAddress), causing the deployment `{self.faulty_service}` to malfunction." + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + self.configmap_name = f"{self.faulty_service}-config" + + self.app.create_workload() + self.mitigation_oracle = MissingCmKeyMitigationOracle( + problem=self, + configmap_name=self.configmap_name, + expected_keys=[ + "consulAddress", + "jaegerAddress", + "FrontendPort", + "GeoPort", + "GeoMongoAddress", + "ProfilePort", + "ProfileMongoAddress", + "ProfileMemcAddress", + "RatePort", + "RateMongoAddress", + "RateMemcAddress", + "RecommendPort", + "RecommendMongoAddress", + "ReservePort", + "ReserveMongoAddress", + "ReserveMemcAddress", + "SearchPort", + "UserPort", + "UserMongoAddress", + "KnativeDomainName", + ], + ) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector.inject_configmap_drift(microservices=[self.faulty_service]) + + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector.recover_configmap_drift(microservices=[self.faulty_service]) + + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/duplicate_pvc_mounts.py b/sregym/conductor/problems/duplicate_pvc_mounts.py new file mode 100644 index 0000000..5ab5f83 --- /dev/null +++ b/sregym/conductor/problems/duplicate_pvc_mounts.py @@ -0,0 +1,55 @@ +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.oracles.workload import WorkloadOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class DuplicatePVCMounts(Problem): + + def __init__(self, app_name: str = "hotel_reservation", faulty_service: str = "mongodb-rate"): + self.app_name = app_name + self.faulty_service = faulty_service + + if self.app_name == "hotel_reservation": + self.app = HotelReservation() + elif self.app_name == "social_network": + self.app = SocialNetwork() + elif self.app_name == "astronomy_shop": + self.app = AstronomyShop() + else: + raise ValueError(f"Unsupported app name: {app_name}") + + super().__init__(app=self.app, namespace=self.app.namespace) + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.root_cause = f"Multiple replicas of the deployment `{self.faulty_service}` are configured to share a single ReadWriteOnce PVC, causing mount conflicts and preventing pods from starting." + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + self.mitigation_oracle = MitigationOracle(problem=self) + + self.app.create_workload() + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._inject( + fault_type="duplicate_pvc_mounts", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._recover( + fault_type="duplicate_pvc_mounts", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/env_variable_shadowing.py b/sregym/conductor/problems/env_variable_shadowing.py new file mode 100644 index 0000000..9dba61e --- /dev/null +++ b/sregym/conductor/problems/env_variable_shadowing.py @@ -0,0 +1,45 @@ +from sregym.conductor.oracles.compound import CompoundedOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.oracles.workload import WorkloadOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class EnvVariableShadowing(Problem): + def __init__(self, app_name: str = "astronomy_shop", faulty_service: str = "frontend-proxy"): + self.faulty_service = faulty_service + self.app_name = app_name + + if self.app_name == "astronomy_shop": + self.app = AstronomyShop() + else: + raise ValueError(f"Unsupported application: {self.app_name}") + + super().__init__(app=self.app, namespace=self.app.namespace) + + self.kubectl = KubeCtl() + self.root_cause = f"The deployment `{self.faulty_service}` has environment variables (e.g., FRONTEND_HOST) that shadow expected values, causing incorrect service configuration." + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = MitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector.inject_env_variable_shadowing(microservices=[self.faulty_service]) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector.recover_env_variable_shadowing(microservices=[self.faulty_service]) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}") diff --git a/sregym/conductor/problems/faulty_image_correlated.py b/sregym/conductor/problems/faulty_image_correlated.py new file mode 100644 index 0000000..26f33d0 --- /dev/null +++ b/sregym/conductor/problems/faulty_image_correlated.py @@ -0,0 +1,48 @@ +from sregym.conductor.oracles.incorrect_image_mitigation import IncorrectImageMitigationOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_app import ApplicationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class FaultyImageCorrelated(Problem): + def __init__(self): + self.app = HotelReservation() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = ["frontend", "geo", "profile", "rate", "recommendation", "reservation", "user", "search"] + self.injector = ApplicationFaultInjector(namespace=self.namespace) + super().__init__(app=self.app, namespace=self.namespace) + self.root_cause = "The deployment `frontend`, `geo`, `profile`, `rate`, `recommendation`, `reservation`, `user`, and `search` are configured to use a faulty image 'jackcuii/hotel-reservation:latest'." + + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + # not really the incorrect image problem, just reuse the incorrect image function + self.mitigation_oracle = IncorrectImageMitigationOracle( + problem=self, + actual_images={service: "jackcuii/hotel-reservation:latest" for service in self.faulty_service}, + ) + + self.app.create_workload() + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + # not really the incorrect image problem, just reuse the incorrect image function + for service in self.faulty_service: + self.injector.inject_incorrect_image( + deployment_name=service, namespace=self.namespace, bad_image="jackcuii/hotel-reservation:latest" + ) + print(f"Service: {service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + for service in self.faulty_service: + self.injector.recover_incorrect_image( + deployment_name=service, + namespace=self.namespace, + correct_image="yinfangchen/hotel-reservation:latest", + ) diff --git a/sregym/conductor/problems/gc_capacity_degradation.py b/sregym/conductor/problems/gc_capacity_degradation.py new file mode 100644 index 0000000..d1019ec --- /dev/null +++ b/sregym/conductor/problems/gc_capacity_degradation.py @@ -0,0 +1,60 @@ +from sregym.conductor.oracles.detection import DetectionOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.generators.workload.blueprint_hotel_work import BHotelWrk, BHotelWrkWorkloadManager +from sregym.service.apps.blueprint_hotel_reservation import BlueprintHotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class GCCapacityDegradation(Problem): + def __init__(self): + self.app = BlueprintHotelReservation() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = "garbage collection" + self.root_cause = "All deployments have the GOGC environment variable set to 10 (instead of the default 100), causing aggressive garbage collection that degrades service capacity and performance. This is a metastable failure." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector.inject_gogc_env_variable_patch(gogc_value="10") + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + self.run_workload() + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector.recover_gogc_env_variable_patch() + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + def create_workload(self, tput: int = None, duration: str = None, multiplier: int = None): + if tput is None: + tput = 2000 + if duration is None: + duration = "500s" + if multiplier is None: + multiplier = 6 + self.wrk = BHotelWrkWorkloadManager( + wrk=BHotelWrk(tput=tput, duration=duration, multiplier=multiplier), + ) + + def start_workload(self): + if not hasattr(self, "wrk"): + self.create_workload() + self.wrk.start() + + def run_workload(self, namespace="default"): + self.start_workload() + job_name = self.wrk.job_name + self.kubectl.wait_for_job_completion(job_name=job_name, namespace=namespace, timeout=1000) + workentries = self.wrk.retrievelog() + workentry = workentries[0] if workentries else None + print(f"Workload Entry: {workentry}") + return workentry diff --git a/sregym/conductor/problems/image_slow_load.py b/sregym/conductor/problems/image_slow_load.py new file mode 100644 index 0000000..0ef667d --- /dev/null +++ b/sregym/conductor/problems/image_slow_load.py @@ -0,0 +1,33 @@ +"""Otel demo imageSlowLoad feature flag fault.""" + +from sregym.conductor.oracles.detection import DetectionOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_otel import OtelFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class ImageSlowLoad(Problem): + def __init__(self): + self.app = AstronomyShop() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.injector = OtelFaultInjector(namespace=self.namespace) + self.faulty_service = "frontend" + self.root_cause = f"The `{self.faulty_service}` service has a feature flag enabled that causes slow image loading, resulting in degraded user experience and performance issues." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_fault("imageSlowLoad") + print(f"Fault: imageSlowLoad | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.recover_fault("imageSlowLoad") diff --git a/sregym/conductor/problems/incorrect_image.py b/sregym/conductor/problems/incorrect_image.py new file mode 100644 index 0000000..517b9e9 --- /dev/null +++ b/sregym/conductor/problems/incorrect_image.py @@ -0,0 +1,46 @@ +from sregym.conductor.oracles.incorrect_image_mitigation import IncorrectImageMitigationOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_app import ApplicationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class IncorrectImage(Problem): + def __init__(self): + self.app = AstronomyShop() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = ["product-catalog"] + self.injector = ApplicationFaultInjector(namespace=self.namespace) + self.root_cause = ( + "The 'product-catalog' deployment is mis-configured to pull the non-existent image 'app-image:latest'." + ) + super().__init__(app=self.app, namespace=self.namespace) + + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + self.mitigation_oracle = IncorrectImageMitigationOracle( + problem=self, actual_images={"product-catalog": "app-image:latest"} + ) + + self.app.create_workload() + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + for service in self.faulty_service: + self.injector.inject_incorrect_image( + deployment_name=service, namespace=self.namespace, bad_image="app-image:latest" + ) + print(f"Service: {service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + for service in self.faulty_service: + self.injector.recover_incorrect_image( + deployment_name=service, + namespace=self.namespace, + correct_image="ghcr.io/open-telemetry/demo:2.0.2-productcatalogservice", + ) diff --git a/sregym/conductor/problems/incorrect_port_assignment.py b/sregym/conductor/problems/incorrect_port_assignment.py new file mode 100644 index 0000000..9d32d30 --- /dev/null +++ b/sregym/conductor/problems/incorrect_port_assignment.py @@ -0,0 +1,44 @@ +from sregym.conductor.oracles.incorrect_port import IncorrectPortAssignmentMitigationOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_app import ApplicationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class IncorrectPortAssignment(Problem): + def __init__(self): + self.app = AstronomyShop() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = "checkout" + self.env_var = "PRODUCT_CATALOG_ADDR" + self.incorrect_port = "8082" + self.correct_port = "8080" + self.injector = ApplicationFaultInjector(namespace=self.namespace) + self.root_cause = f"The deployment `{self.faulty_service}` has the environment variable `{self.env_var}` configured with an incorrect port `{self.incorrect_port}` instead of `{self.correct_port}`." + super().__init__(app=self.app, namespace=self.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + self.mitigation_oracle = IncorrectPortAssignmentMitigationOracle(problem=self) + + self.app.create_workload() + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_incorrect_port_assignment( + deployment_name=self.faulty_service, + component_label=self.faulty_service, + env_var=self.env_var, + incorrect_port=self.incorrect_port, + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.recover_incorrect_port_assignment( + deployment_name="checkout", env_var=self.env_var, correct_port="8080" + ) diff --git a/sregym/conductor/problems/ingress_misroute.py b/sregym/conductor/problems/ingress_misroute.py new file mode 100644 index 0000000..9463f5d --- /dev/null +++ b/sregym/conductor/problems/ingress_misroute.py @@ -0,0 +1,77 @@ +from kubernetes import client + +from sregym.conductor.oracles.ingress_misroute_oracle import IngressMisrouteMitigationOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class IngressMisroute(Problem): + def __init__(self, path="/api", correct_service="frontend-service", wrong_service="recommendation-service"): + self.app = HotelReservation() + self.kubectl = KubeCtl() + self.path = path + self.correct_service = correct_service + self.wrong_service = wrong_service + self.ingress_name = "hotel-reservation-ingress" + self.root_cause = f"The ingress `{self.ingress_name}` has a misconfigured routing rule for path `{self.path}`, routing traffic to the wrong service (`{self.wrong_service}` instead of `{self.correct_service}`)." + super().__init__(app=self.app, namespace=self.app.namespace) + + self.networking_v1 = client.NetworkingV1Api() + + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + self.mitigation_oracle = IngressMisrouteMitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + """Misroute /api to wrong backend""" + + try: + ingress = self.networking_v1.read_namespaced_ingress(name=self.ingress_name, namespace=self.namespace) + except client.exceptions.ApiException as e: + if e.status == 404: + ingress_manifest = { + "apiVersion": "networking.k8s.io/v1", + "kind": "Ingress", + "metadata": {"name": self.ingress_name, "namespace": self.namespace}, + "spec": { + "rules": [ + { + "http": { + "paths": [ + { + "path": self.path, + "pathType": "Prefix", + "backend": { + "service": {"name": self.correct_service, "port": {"number": 80}} + }, + } + ] + } + } + ] + }, + } + self.networking_v1.create_namespaced_ingress(namespace=self.namespace, body=ingress_manifest) + ingress = self.networking_v1.read_namespaced_ingress(name=self.ingress_name, namespace=self.namespace) + else: + raise + + # Modify the rule for /api to wrong_service + for rule in ingress.spec.rules: + for path in rule.http.paths: + if path.path == self.path: + path.backend.service.name = self.wrong_service + self.networking_v1.replace_namespaced_ingress(name=self.ingress_name, namespace=self.namespace, body=ingress) + + @mark_fault_injected + def recover_fault(self): + """Revert misroute to correct backend""" + ingress = self.networking_v1.read_namespaced_ingress(name=self.ingress_name, namespace=self.namespace) + for rule in ingress.spec.rules: + for path in rule.http.paths: + if path.path == self.path: + path.backend.service.name = self.correct_service + self.networking_v1.replace_namespaced_ingress(name=self.ingress_name, namespace=self.namespace, body=ingress) diff --git a/sregym/conductor/problems/kafka_queue_problems.py b/sregym/conductor/problems/kafka_queue_problems.py new file mode 100644 index 0000000..b122c1d --- /dev/null +++ b/sregym/conductor/problems/kafka_queue_problems.py @@ -0,0 +1,32 @@ +"""Otel demo kafkaQueueProblems feature flag fault.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_otel import OtelFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class KafkaQueueProblems(Problem): + def __init__(self): + self.app = AstronomyShop() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.injector = OtelFaultInjector(namespace=self.namespace) + self.faulty_service = "kafka" + self.root_cause = f"The `{self.faulty_service}` service has a feature flag enabled that causes queue problems, resulting in message processing failures." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_fault("kafkaQueueProblems") + print(f"Fault: kafkaQueueProblems | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.recover_fault("kafkaQueueProblems") diff --git a/sregym/conductor/problems/kubelet_crash.py b/sregym/conductor/problems/kubelet_crash.py new file mode 100644 index 0000000..cac7c70 --- /dev/null +++ b/sregym/conductor/problems/kubelet_crash.py @@ -0,0 +1,42 @@ +import time + +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_remote_os import RemoteOSFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class KubeletCrash(Problem): + def __init__(self): + self.app = AstronomyShop() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.rollout_services = ["frontend", "frontend-proxy", "currency"] + self.injector = RemoteOSFaultInjector() + + super().__init__(app=self.app, namespace=self.app.namespace) + self.root_cause = "The kubelet daemon on a node has crashed, preventing pod scheduling, updates, and management on that node, causing services to become unavailable or stuck." + + # note from JC after talking to Bohan: + # We could consider adding an oracle later, but it's not trivial where diagnosis should go + # Same with mitigation, this is done with a script to kill the kubelet daemon. + # Maybe we could implement an oracle later to check for the status of the kubelet daemon? + self.app.create_workload() + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_kubelet_crash() + # rollout the services to trigger the failure + for service in self.rollout_services: + print(f"Rolling out {service}...") + self.kubectl.trigger_rollout(deployment_name=service, namespace=self.namespace) + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.recover_kubelet_crash() + for service in self.rollout_services: + print(f"Rolling out {service}...") + self.kubectl.trigger_rollout(deployment_name=service, namespace=self.namespace) diff --git a/sregym/conductor/problems/latent_sector_error.py b/sregym/conductor/problems/latent_sector_error.py new file mode 100644 index 0000000..c6cc6d7 --- /dev/null +++ b/sregym/conductor/problems/latent_sector_error.py @@ -0,0 +1,312 @@ +from enum import StrEnum +from typing import Dict, Optional, Tuple + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_kernel import KernelInjector +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.dm_dust_manager import DM_DUST_DEVICE_NAME +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + +# Constants +DEFAULT_TARGET_DEPLOY = "mongodb-geo" +DEFAULT_NAMESPACE = "hotel-reservation" +DEFAULT_BAD_BLOCK_STEP = 1000 +TEST_BAD_BLOCKS = [100, 200, 300] + + +class LatentSectorErrorStrategy(StrEnum): + """Strategy for injecting bad blocks in dm-dust device.""" + + TEST = "test" + EVERY_1000 = "every_1000" # Also test strategy + TARGETED = "targeted" + + +class LatentSectorError(Problem): + """ + Simulates latent sector errors (LSE) on a MongoDB PVC + (geo, profile, reservation, etc.) using dm-dust inside Khaos. + """ + + def __init__( + self, + target_deploy: str = DEFAULT_TARGET_DEPLOY, + namespace: str = DEFAULT_NAMESPACE, + strategy: LatentSectorErrorStrategy = LatentSectorErrorStrategy.TARGETED, + ): + self.app = HotelReservation() + self.kubectl = KubeCtl() + self.namespace = namespace + self.deploy = target_deploy + self.injector = KernelInjector(self.kubectl) + self.target_node: Optional[str] = None + self.pvc_path: Optional[str] = None + self.strategy = strategy + + super().__init__(app=self.app, namespace=self.app.namespace) + + self.root_cause = "There's a latent sector error on the hard drive that the mongodb-geo service's data is on." + + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + + def requires_khaos(self) -> bool: + """This problem requires Khaos for dm-dust infrastructure setup.""" + return True + + def _get_kubectl_json(self, command: str) -> dict: + """Execute kubectl command and parse JSON output.""" + out = self.kubectl.exec_command(command) + if not out: + raise RuntimeError(f"Command returned empty output: {command}") + import json + + return json.loads(out) + + def _discover_node_for_deploy(self) -> Optional[str]: + """Return the node where the target deployment is running.""" + # First try with a label selector (common OpenEBS hotel-reservation pattern) + svc = self.deploy.split("-", 1)[-1] # e.g. "geo" + cmd = f"kubectl -n {self.namespace} get pods -l app=mongodb,component={svc} -o json" + try: + data = self._get_kubectl_json(cmd) + for item in data.get("items", []): + if item.get("status", {}).get("phase") == "Running": + return item["spec"]["nodeName"] + except (KeyError, RuntimeError): + pass + + # Fallback: search by pod name prefix + cmd = f"kubectl -n {self.namespace} get pods -o json" + try: + data = self._get_kubectl_json(cmd) + for item in data.get("items", []): + name = item["metadata"]["name"] + if name.startswith(self.deploy) and item.get("status", {}).get("phase") == "Running": + return item["spec"]["nodeName"] + except (KeyError, RuntimeError): + pass + + return None + + def _discover_pvc(self) -> Tuple[str, str, str]: + """ + Discover PVC information for the target deployment. + + Returns: + Tuple of (pvc_name, pv_name, local_path) + """ + cmd = f"kubectl -n {self.namespace} get pvc -o json" + data = self._get_kubectl_json(cmd) + + pvc_name, pv_name = None, None + deploy_component = self.deploy.split("-")[-1] # e.g. "geo" + + for item in data.get("items", []): + claim = item["metadata"]["name"] + if deploy_component in claim: # match geo, profile, etc. + pvc_name = claim + pv_name = item["spec"]["volumeName"] + break + + if not pvc_name: + raise RuntimeError(f"Could not find PVC for deployment {self.deploy}") + + cmd = f"kubectl get pv {pv_name} -o json" + pv = self._get_kubectl_json(cmd) + + try: + local_path = pv["spec"]["local"]["path"] + except KeyError: + raise RuntimeError(f"PV {pv_name} does not have a local path (not a local PV)") + + return pvc_name, pv_name, local_path + + def _get_openebs_storage_size(self, node: str) -> Dict[str, int]: + """ + Get storage information for the OpenEBS dm-dust device. + + Returns: + Dictionary with sectors, size_bytes, size_mb, size_gb, block_size + """ + script = f""" +set -e +DM_NAME={DM_DUST_DEVICE_NAME} +if [ -e /dev/mapper/$DM_NAME ]; then + SECTORS=$(blockdev --getsz /dev/mapper/$DM_NAME) + SIZE_BYTES=$(blockdev --getsize64 /dev/mapper/$DM_NAME) + SIZE_MB=$((SIZE_BYTES / 1024 / 1024)) + SIZE_GB=$((SIZE_BYTES / 1024 / 1024 / 1024)) + BLOCK_SIZE=$(blockdev --getbsz /dev/mapper/$DM_NAME) + echo "$SECTORS,$SIZE_BYTES,$SIZE_MB,$SIZE_GB,$BLOCK_SIZE" +else + echo "0,0,0,0,0" +fi +""" + + result = self.injector._exec_on_node(node, script).strip() + try: + sectors, size_bytes, size_mb, size_gb, block_size = result.split(",") + return { + "sectors": int(sectors), + "size_bytes": int(size_bytes), + "size_mb": int(size_mb), + "size_gb": int(size_gb), + "block_size": int(block_size), + } + except (ValueError, IndexError) as e: + raise RuntimeError(f"Failed to parse storage size output: {result}") from e + + def _get_target_file_blocks(self, node: str) -> list[int]: + """ + Identify physical blocks used by MongoDB data files (.wt) on the target node. + Returns a list of block numbers (in 512b sectors) to corrupt. + """ + # Find mount point of the dm-dust device + cmd = f"lsblk -o MOUNTPOINT -n /dev/mapper/{DM_DUST_DEVICE_NAME}" + mount_point = self.injector._exec_on_node(node, cmd).strip() + + if not mount_point: + print(f"[MongoDBLSE] Warning: {DM_DUST_DEVICE_NAME} is not mounted. Cannot find target files.") + return [] + + print(f"[MongoDBLSE] Found mount point: {mount_point}") + + # Script to find blocks + script = f""" + set -e + FILES=$(find {mount_point} -name "*.wt") + BAD_BLOCKS="" + for FILE in $FILES; do + BS=$(stat -f -c %S "$FILE") + # Get start of each extent + # filefrag -v output format: + # ext: logical_offset: physical_offset: length: ... + # 0: 0.. 0: 34048.. 34048: 1: + OFFSETS=$(filefrag -v "$FILE" | awk '/^[ ]*[0-9]+:/ {{print $4}}' | cut -d. -f1) + for OFF in $OFFSETS; do + START_SECTOR=$((OFF * BS / 512)) + # Corrupt 10 sectors (5KB) at the start of each extent to ensure we hit it + for I in $(seq 0 9); do + BAD_BLOCKS="$BAD_BLOCKS $((START_SECTOR + I))" + done + done + done + echo $BAD_BLOCKS + """ + + result = self.injector._exec_on_node(node, script).strip() + try: + return [int(b) for b in result.split()] + except ValueError: + return [] + + def _inject_badblocks_by_strategy(self, node: str, storage_info: Dict[str, int]) -> None: + """Inject bad blocks according to the configured strategy.""" + if self.strategy == LatentSectorErrorStrategy.EVERY_1000: + start_sector = 0 + end_sector = storage_info["sectors"] + step = DEFAULT_BAD_BLOCK_STEP + + self.injector.dm_dust_add_badblocks_range( + node, DM_DUST_DEVICE_NAME, start=start_sector, end=end_sector, step=step + ) + + elif self.strategy == LatentSectorErrorStrategy.TEST: + self.injector.dm_dust_add_badblocks(node, DM_DUST_DEVICE_NAME, TEST_BAD_BLOCKS) + + elif self.strategy == LatentSectorErrorStrategy.TARGETED: + print(f"[MongoDBLSE] Strategy TARGETED: Identifying MongoDB data blocks...") + blocks = self._get_target_file_blocks(node) + if not blocks: + print(f"[MongoDBLSE] Warning: No target blocks found. Falling back to TEST strategy.") + self.injector.dm_dust_add_badblocks(node, DM_DUST_DEVICE_NAME, TEST_BAD_BLOCKS) + else: + print(f"[MongoDBLSE] Injecting {len(blocks)} bad blocks targeting data files.") + # Inject in chunks to avoid command line length limits + chunk_size = 1000 + for i in range(0, len(blocks), chunk_size): + chunk = blocks[i : i + chunk_size] + self.injector.dm_dust_add_badblocks(node, DM_DUST_DEVICE_NAME, chunk) + + else: + raise ValueError(f"Unknown strategy: {self.strategy}") + + @mark_fault_injected + def inject_fault(self): + """Inject latent sector errors using dm-dust bad blocks.""" + print(f"[MongoDBLSE] Starting latent sector error injection for {self.deploy}") + + # Get target node where the deployment is running + self.target_node = self._discover_node_for_deploy() + if not self.target_node: + raise RuntimeError(f"Could not find running node for deployment {self.deploy}") + + print(f"[MongoDBLSE] Target node: {self.target_node}") + + # Since dm-dust infrastructure is already set up by Conductor, + # we just need to add bad blocks and enable them + + # Clear any existing bad blocks from previous runs + print(f"[MongoDBLSE] Clearing existing bad blocks...") + self.injector.dm_dust_clear(self.target_node, DM_DUST_DEVICE_NAME) + + # Ensure we start in bypass mode + print(f"[MongoDBLSE] Setting device to bypass mode...") + self.injector.dm_dust_disable(self.target_node, DM_DUST_DEVICE_NAME) + + # Apply strategy-based bad blocks injection + storage_info = self._get_openebs_storage_size(self.target_node) + if storage_info["sectors"] == 0: + raise RuntimeError( + f"OpenEBS dm-dust device not found or has 0 sectors on node {self.target_node}. " + f"Ensure dm-dust infrastructure is set up." + ) + + self._inject_badblocks_by_strategy(self.target_node, storage_info) + + print(f"[MongoDBLSE] Enabling bad block simulation (fail_read_on_bad_block mode)") + self.injector.dm_dust_enable(self.target_node, DM_DUST_DEVICE_NAME) + + # Drop caches to force disk reads + print(f"[MongoDBLSE] Dropping caches to force disk reads...") + self.injector.drop_caches(self.target_node) + + print(f"[MongoDBLSE] Latent sector error injection complete") + + def _restart_mongodb_pod(self) -> None: + """Restart the MongoDB deployment to recover from CrashLoopBackOff.""" + print(f"[MongoDBLSE] Restarting MongoDB deployment {self.deploy}...") + cmd = f"kubectl -n {self.namespace} rollout restart deployment {self.deploy}" + self.kubectl.exec_command(cmd) + print(f"[MongoDBLSE] ✅ Deployment restart initiated") + + @mark_fault_injected + def recover_fault(self): + """Recover from latent sector error injection by clearing bad blocks.""" + print(f"[MongoDBLSE] Starting recovery from latent sector error injection") + + if not self.target_node: + print(f"[MongoDBLSE] No target node found, skipping recovery") + return + + print(f"[MongoDBLSE] Disabling bad block simulation on {self.target_node}") + self.injector.dm_dust_disable(self.target_node, DM_DUST_DEVICE_NAME) + + print(f"[MongoDBLSE] Clearing all bad blocks...") + self.injector.dm_dust_clear(self.target_node, DM_DUST_DEVICE_NAME) + + # Verify cleanup + blocks = self.injector.dm_dust_list(self.target_node, DM_DUST_DEVICE_NAME) + if blocks != "No blocks in badblocklist": + print(f"[MongoDBLSE] Warning: Bad blocks still present: {blocks}") + else: + print(f"[MongoDBLSE] ✅ All bad blocks cleared") + + # Restart MongoDB pod to recover instantly + self._restart_mongodb_pod() + + print(f"[MongoDBLSE] Recovery complete") diff --git a/sregym/conductor/problems/liveness_probe_misconfiguration.py b/sregym/conductor/problems/liveness_probe_misconfiguration.py new file mode 100644 index 0000000..a1f9502 --- /dev/null +++ b/sregym/conductor/problems/liveness_probe_misconfiguration.py @@ -0,0 +1,58 @@ +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class LivenessProbeMisconfiguration(Problem): + def __init__(self, app_name="social_network", faulty_service="user-service"): + self.app_name = app_name + self.faulty_service = faulty_service + + if app_name == "social_network": + self.app = SocialNetwork() + self.app.create_workload(duration=30) + + elif app_name == "hotel_reservation": + self.app = HotelReservation() + self.app.create_workload(duration=30) + + elif app_name == "astronomy_shop": + self.app = AstronomyShop() + self.app.create_workload() + + else: + raise ValueError(f"Unsupported app name: {app_name}") + + super().__init__(app=self.app, namespace=self.app.namespace) + + self.kubectl = KubeCtl() + self.injector = VirtualizationFaultInjector(namespace=self.app.namespace) + self.root_cause = f"The deployment `{self.faulty_service}` has a misconfigured liveness probe pointing to a non-existent health endpoint (/healthz on port 8080), causing pods to be restarted repeatedly." + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.mitigation_oracle = MitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector._inject( + fault_type="liveness_probe_misconfiguration", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector._recover( + fault_type="liveness_probe_misconfiguration", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/liveness_probe_too_aggressive.py b/sregym/conductor/problems/liveness_probe_too_aggressive.py new file mode 100644 index 0000000..3c32e7e --- /dev/null +++ b/sregym/conductor/problems/liveness_probe_too_aggressive.py @@ -0,0 +1,46 @@ +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.sustained_readiness import SustainedReadinessOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class LivenessProbeTooAggressive(Problem): + def __init__(self, app_name: str = "social_network"): + self.app_name = app_name + self.faulty_service = "aux-service" + + if app_name == "social_network": + self.app = SocialNetwork() + elif app_name == "hotel_reservation": + self.app = HotelReservation() + elif app_name == "astronomy_shop": + self.app = AstronomyShop() + else: + raise ValueError(f"Unsupported app name: {app_name}") + + super().__init__(app=self.app, namespace=self.app.namespace) + + self.kubectl = KubeCtl() + self.injector = VirtualizationFaultInjector(namespace=self.app.namespace) + self.root_cause = f"The deployment `{self.faulty_service}` has an overly aggressive liveness probe (initialDelaySeconds=0, periodSeconds=1, failureThreshold=1) with terminationGracePeriodSeconds=0, causing pods to be killed immediately if the probe fails." + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = SustainedReadinessOracle(problem=self, sustained_period=30) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_liveness_probe_too_aggressive([self.faulty_service]) + print(f"Service: {self.faulty_service} | Namespace: {self.app.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.recover_liveness_probe_too_aggressive([self.faulty_service]) + print(f"Service: {self.faulty_service} | Namespace: {self.app.namespace}\n") diff --git a/sregym/conductor/problems/load_spike_rpc_retry_storm.py b/sregym/conductor/problems/load_spike_rpc_retry_storm.py new file mode 100644 index 0000000..c890710 --- /dev/null +++ b/sregym/conductor/problems/load_spike_rpc_retry_storm.py @@ -0,0 +1,54 @@ +from sregym.conductor.oracles.detection import DetectionOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.rpc_retry_storm_mitigation import RPCRetryStormMitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.generators.workload.blueprint_hotel_work import BHotelWrk, BHotelWrkWorkloadManager +from sregym.service.apps.blueprint_hotel_reservation import BlueprintHotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class LoadSpikeRPCRetryStorm(Problem): + def __init__(self): + self.app = BlueprintHotelReservation() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = "rpc" + self.root_cause = f"The ConfigMap `{self.faulty_service}` has misconfigured RPC timeout (50ms) and retry settings (30 retries), combined with a load spike, causing an RPC retry storm that overwhelms the service." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.mitigation_oracle = RPCRetryStormMitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector.inject_rpc_timeout_retries_misconfiguration(configmap=self.faulty_service) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + self.mitigation_oracle.run_workload(problem=self, kubectl=self.kubectl) + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector.recover_rpc_timeout_retries_misconfiguration(configmap=self.faulty_service) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + def create_workload(self, tput: int = None, duration: str = None, multiplier: int = None): + if tput is None: + tput = 1000 + if duration is None: + duration = "600s" + if multiplier is None: + multiplier = 6 + self.wrk = BHotelWrkWorkloadManager( + wrk=BHotelWrk(tput=tput, duration=duration, multiplier=multiplier), + ) + + def start_workload(self): + if not hasattr(self, "wrk"): + self.create_workload() + self.wrk.start() diff --git a/sregym/conductor/problems/loadgenerator_flood_homepage.py b/sregym/conductor/problems/loadgenerator_flood_homepage.py new file mode 100644 index 0000000..2841727 --- /dev/null +++ b/sregym/conductor/problems/loadgenerator_flood_homepage.py @@ -0,0 +1,33 @@ +"""Otel demo loadgeneratorFloodHomepage feature flag fault.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_otel import OtelFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class LoadGeneratorFloodHomepage(Problem): + def __init__(self): + self.app = AstronomyShop() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.injector = OtelFaultInjector(namespace=self.namespace) + self.faulty_service = "frontend" # This fault technically gets injected into the load generator, but the loadgenerator just spams the frontend + # We can discuss more and see if we think we should change it, but loadgenerator isn't a "real" service. + self.root_cause = "The load generator has a feature flag enabled that causes it to flood the homepage with excessive requests, overwhelming the frontend service." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_fault("loadGeneratorFloodHomepage") + print(f"Fault: loadgeneratorFloodHomepage | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.recover_fault("loadGeneratorFloodHomepage") diff --git a/sregym/conductor/problems/misconfig_app.py b/sregym/conductor/problems/misconfig_app.py new file mode 100644 index 0000000..7bdef6a --- /dev/null +++ b/sregym/conductor/problems/misconfig_app.py @@ -0,0 +1,43 @@ +"""MongoDB storage user unregistered problem in the HotelReservation application.""" + +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_app import ApplicationFaultInjector +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class MisconfigAppHotelRes(Problem): + def __init__(self): + self.app = HotelReservation() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = "geo" + self.root_cause = "The 'geo' deployment is configured to use a buggy container image 'yinfangchen/geo:app3', this causes the pod keep restarting and entering the 'Error' state." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = MitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = ApplicationFaultInjector(namespace=self.namespace) + injector._inject( + fault_type="misconfig_app", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = ApplicationFaultInjector(namespace=self.namespace) + injector._recover( + fault_type="misconfig_app", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/missing_configmap.py b/sregym/conductor/problems/missing_configmap.py new file mode 100644 index 0000000..96f8cdf --- /dev/null +++ b/sregym/conductor/problems/missing_configmap.py @@ -0,0 +1,45 @@ +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class MissingConfigMap(Problem): + def __init__(self, app_name: str = "social_network", faulty_service: str = "media-mongodb"): + self.faulty_service = faulty_service + self.app_name = app_name + + if self.app_name == "social_network": + self.app = SocialNetwork() + elif self.app_name == "hotel_reservation": + self.app = HotelReservation() + else: + raise ValueError(f"Unsupported app name: {app_name}") + + super().__init__(app=self.app, namespace=self.app.namespace) + + self.kubectl = KubeCtl() + self.root_cause = f"The ConfigMap required by the deployment `{self.faulty_service}` has been deleted, causing the pods to fail to start or malfunction." + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = MitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._inject(fault_type="missing_configmap", microservices=[self.faulty_service]) + + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._recover(fault_type="missing_configmap", microservices=[self.faulty_service]) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}") diff --git a/sregym/conductor/problems/missing_env_variable.py b/sregym/conductor/problems/missing_env_variable.py new file mode 100644 index 0000000..1535628 --- /dev/null +++ b/sregym/conductor/problems/missing_env_variable.py @@ -0,0 +1,53 @@ +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.missing_env_variable_mitigation import MissingEnvVariableMitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_app import ApplicationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class MissingEnvVariable(Problem): + def __init__(self, app_name: str = "astronomy_shop", faulty_service: str = "frontend"): + self.faulty_service = faulty_service + self.app_name = app_name + + if self.app_name == "astronomy_shop": + self.app = AstronomyShop() + self.env_var = "CART_ADDR" + self.env_var_value = "cart:8080" + self.root_cause = ( + f"The deployment `{self.faulty_service}` is missing the environment variable `{self.env_var}`." + ) + else: + raise ValueError(f"Unsupported app name: {app_name}") + + super().__init__(app=self.app, namespace=self.app.namespace) + + self.kubectl = KubeCtl() + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = MissingEnvVariableMitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = ApplicationFaultInjector(namespace=self.namespace) + injector.inject_missing_env_variable( + deployment_name=self.faulty_service, + env_var=self.env_var, + ) + + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = ApplicationFaultInjector(namespace=self.namespace) + injector.recover_missing_env_variable( + deployment_name=self.faulty_service, + env_var=self.env_var, + env_value=self.env_var_value, + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}") diff --git a/sregym/conductor/problems/missing_service.py b/sregym/conductor/problems/missing_service.py new file mode 100644 index 0000000..283f933 --- /dev/null +++ b/sregym/conductor/problems/missing_service.py @@ -0,0 +1,54 @@ +from sregym.conductor.oracles.compound import CompoundedOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.oracles.workload import WorkloadOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class MissingService(Problem): + def __init__(self, app_name: str = "hotel_reservation", faulty_service: str = "frontend"): + self.app_name = app_name + self.faulty_service = faulty_service + + if self.app_name == "hotel_reservation": + self.app = HotelReservation() + elif self.app_name == "social_network": + self.app = SocialNetwork() + elif self.app_name == "astronomy_shop": + self.app = AstronomyShop() + else: + raise ValueError(f"Unsupported app_name: {app_name}") + + super().__init__(app=self.app, namespace=self.app.namespace) + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.root_cause = f"The service `{self.faulty_service}` has been deleted, causing service discovery failures for dependent services." + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + self.app.create_workload() + self.mitigation_oracle = MitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._inject( + fault_type="missing_service", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._recover( + fault_type="missing_service", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/multiple_failures.py b/sregym/conductor/problems/multiple_failures.py new file mode 100644 index 0000000..c237d37 --- /dev/null +++ b/sregym/conductor/problems/multiple_failures.py @@ -0,0 +1,55 @@ +"""Simulating multiple failures in microservice applications, implemented by composing multiple single-fault problems.""" + +import time + +from sregym.conductor.oracles.compound import CompoundedOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.composite_app import CompositeApp +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class MultipleIndependentFailures(Problem): + def __init__(self, problems: list[Problem]): + self.problems = problems + apps = [p.app for p in problems] + self.app = CompositeApp(apps) + self.namespaces = [p.namespace for p in problems] + self.fault_injected = False + + # === Attaching problem's oracles === + diagnosis_oracles = [p.diagnosis_oracle for p in self.problems] + if len(diagnosis_oracles) > 0: + print(f"[MIF] Diagnosis oracles: {diagnosis_oracles}") + self.diagnosis_oracle = CompoundedOracle(self, *diagnosis_oracles) + + mitigation_oracles = [p.mitigation_oracle for p in self.problems] + if len(mitigation_oracles) > 0: + print(f"[MIF] Mitigation oracles: {mitigation_oracles}") + self.mitigation_oracle = CompoundedOracle(self, *mitigation_oracles) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + for p in self.problems: + print(f"Injecting Fault: {p.__class__.__name__} | Namespace: {p.namespace}") + p.inject_fault() + time.sleep(1) + self.faults_str = " | ".join([f"{p.__class__.__name__}" for p in self.problems]) + print( + f"Injecting Fault: Multiple faults from included problems: [{self.faults_str}] | Namespace: {self.namespaces}\n" + ) + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + for p in self.problems: + print(f"Recovering Fault: {p.__class__.__name__} | Namespace: {p.namespace}") + p.recover_fault() + time.sleep(1) + print( + f"Recovering Fault: Multiple faults from included problems: [{self.faults_str}] | Namespace: {self.namespaces}\n" + ) diff --git a/sregym/conductor/problems/namespace_memory_limit.py b/sregym/conductor/problems/namespace_memory_limit.py new file mode 100644 index 0000000..b6e2197 --- /dev/null +++ b/sregym/conductor/problems/namespace_memory_limit.py @@ -0,0 +1,36 @@ +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.namespace_memory_limit_mitigation import NamespaceMemoryLimitMitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class NamespaceMemoryLimit(Problem): + def __init__(self): + self.app = HotelReservation() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = "search" + self.injector = VirtualizationFaultInjector(namespace=self.namespace) + self.root_cause = f"The namespace has a ResourceQuota with a memory limit (1Gi) that is too restrictive, preventing the deployment `{self.faulty_service}` from scheduling new pods or causing existing pods to be evicted." + super().__init__(app=self.app, namespace=self.app.namespace) + + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + self.mitigation_oracle = NamespaceMemoryLimitMitigationOracle(problem=self) + + self.app.create_workload() + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_namespace_memory_limit( + deployment_name=self.faulty_service, namespace=self.namespace, memory_limit="1Gi" + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.recover_namespace_memory_limit(deployment_name=self.faulty_service, namespace=self.namespace) diff --git a/sregym/conductor/problems/network_policy_block.py b/sregym/conductor/problems/network_policy_block.py new file mode 100644 index 0000000..d4888db --- /dev/null +++ b/sregym/conductor/problems/network_policy_block.py @@ -0,0 +1,50 @@ +from kubernetes import client, config + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.network_policy_oracle import NetworkPolicyMitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class NetworkPolicyBlock(Problem): + def __init__(self, faulty_service="payment-service"): + self.app = HotelReservation() + self.kubectl = KubeCtl() + self.faulty_service = faulty_service + self.policy_name = f"deny-all-{faulty_service}" + + self.app.payload_script = ( + TARGET_MICROSERVICES / "hotelReservation/wrk2/scripts/hotel-reservation/mixed-workload_type_1.lua" + ) + self.app.create_workload() + + super().__init__(app=self.app, namespace=self.app.namespace) + self.root_cause = f"A NetworkPolicy `{self.policy_name}` is configured to block all ingress and egress traffic to/from pods labeled with `app={self.faulty_service}`, causing complete network isolation and service unavailability." + self.networking_v1 = client.NetworkingV1Api() + + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + self.mitigation_oracle = NetworkPolicyMitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + """Block ALL traffic to/from the target service""" + policy = { + "apiVersion": "networking.k8s.io/v1", + "kind": "NetworkPolicy", + "metadata": {"name": self.policy_name, "namespace": self.namespace}, + "spec": { + "podSelector": {"matchLabels": {"app": self.faulty_service}}, + "policyTypes": ["Ingress", "Egress"], + "ingress": [], + "egress": [], + }, + } + self.networking_v1.create_namespaced_network_policy(namespace=self.namespace, body=policy) + + @mark_fault_injected + def recover_fault(self): + """Remove the NetworkPolicy""" + self.networking_v1.delete_namespaced_network_policy(name=self.policy_name, namespace=self.namespace) diff --git a/sregym/conductor/problems/operator_misoperation/IMPORTANT.md b/sregym/conductor/problems/operator_misoperation/IMPORTANT.md new file mode 100644 index 0000000..ed356e5 --- /dev/null +++ b/sregym/conductor/problems/operator_misoperation/IMPORTANT.md @@ -0,0 +1,2 @@ +These problems are still implemented the old way, so we need to port them to the new format before we +can test them again. \ No newline at end of file diff --git a/sregym/conductor/problems/operator_misoperation/__init__.py b/sregym/conductor/problems/operator_misoperation/__init__.py new file mode 100644 index 0000000..d6cd490 --- /dev/null +++ b/sregym/conductor/problems/operator_misoperation/__init__.py @@ -0,0 +1,5 @@ +from .invalid_affinity_toleration import K8SOperatorInvalidAffinityTolerationFault +from .non_existent_storage import K8SOperatorNonExistentStorageFault +from .overload_replicas import K8SOperatorOverloadReplicasFault +from .security_context_fault import K8SOperatorSecurityContextFault +from .wrong_update_strategy import K8SOperatorWrongUpdateStrategyFault diff --git a/sregym/conductor/problems/operator_misoperation/invalid_affinity_toleration.py b/sregym/conductor/problems/operator_misoperation/invalid_affinity_toleration.py new file mode 100644 index 0000000..70b65fa --- /dev/null +++ b/sregym/conductor/problems/operator_misoperation/invalid_affinity_toleration.py @@ -0,0 +1,46 @@ +""" +This misoperation specifies an invalid toleration effect. +""" + +import time +from datetime import datetime, timedelta +from typing import Any + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.operator_misoperation.invalid_affinity_mitigation import InvalidAffinityMitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_operator import K8SOperatorFaultInjector +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.fleet_cast import FleetCast +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class K8SOperatorInvalidAffinityTolerationFault(Problem): + def __init__(self, faulty_service="tidb-app"): + app = FleetCast() + print("App's namespace:", app.namespace) + super().__init__(app=app, namespace="tidb-cluster") + self.faulty_service = faulty_service + self.kubectl = KubeCtl() + self.root_cause = "The TiDBCluster custom resource specifies an invalid toleration effect, causing pods to be unschedulable and remain in Pending state." + self.app.create_workload() + + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.mitigation_oracle = InvalidAffinityMitigationOracle(problem=self, deployment_name="basic") + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = K8SOperatorFaultInjector(namespace="tidb-cluster") + injector.inject_invalid_affinity_toleration() + print(f"[FAULT INJECTED] {self.faulty_service} invalid affinity toleration failure\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + + injector = K8SOperatorFaultInjector(namespace="tidb-cluster") + injector.recover_invalid_affinity_toleration() + print(f"[FAULT INJECTED] {self.faulty_service} invalid affinity toleration failure\n") diff --git a/sregym/conductor/problems/operator_misoperation/non_existent_storage.py b/sregym/conductor/problems/operator_misoperation/non_existent_storage.py new file mode 100644 index 0000000..978d373 --- /dev/null +++ b/sregym/conductor/problems/operator_misoperation/non_existent_storage.py @@ -0,0 +1,44 @@ +""" +This fault specifies a non-existent storage class. +""" + +import time +from datetime import datetime, timedelta +from typing import Any + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.operator_misoperation.non_existent_storage_mitigation import ( + NonExistentStorageClassMitigationOracle, +) +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_operator import K8SOperatorFaultInjector +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.fleet_cast import FleetCast +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class K8SOperatorNonExistentStorageFault(Problem): + def __init__(self, faulty_service="tidb-app"): + app = FleetCast() + super().__init__(app=app, namespace="tidb-cluster") + self.faulty_service = faulty_service + self.kubectl = KubeCtl() + self.problem_id = "operator_non_existent_storage" + self.root_cause = "The TiDBCluster custom resource specifies a non-existent StorageClass, causing PVC creation to fail and pods to remain in Pending state." + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + self.mitigation_oracle = NonExistentStorageClassMitigationOracle(problem=self, deployment_name="basic") + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = K8SOperatorFaultInjector(namespace="tidb-cluster") + injector.inject_non_existent_storage() + print(f"[FAULT INJECTED] {self.faulty_service} non-existent storage failure\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = K8SOperatorFaultInjector(namespace="tidb-cluster") + injector.recover_non_existent_storage() + print(f"[FAULT RECOVERED] {self.faulty_service} non-existent storage failure\n") diff --git a/sregym/conductor/problems/operator_misoperation/overload_replicas.py b/sregym/conductor/problems/operator_misoperation/overload_replicas.py new file mode 100644 index 0000000..8755774 --- /dev/null +++ b/sregym/conductor/problems/operator_misoperation/overload_replicas.py @@ -0,0 +1,45 @@ +# Ramifications: The TiDB cluster can become unhealthy: +# $ kubectl get events -n tidb-cluster +# 10m Warning Unhealthy pod/basic-tidb-0 Readiness probe failed: dial tcp 10.244.0.27:4000: connect: connection refused + +# Only a few pods (e.g., 4 out of 100,000 replicas requested) are created successfully. + + +import time +from datetime import datetime, timedelta +from typing import Any + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.operator_misoperation.overload_replicas_mitigation import OverloadReplicasMitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_operator import K8SOperatorFaultInjector +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.fleet_cast import FleetCast +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class K8SOperatorOverloadReplicasFault(Problem): + def __init__(self, faulty_service="tidb-app"): + app = FleetCast() + super().__init__(app=app, namespace="tidb-cluster") + self.faulty_service = faulty_service + self.kubectl = KubeCtl() + self.root_cause = "The TiDBCluster custom resource is configured with an excessive number of replicas (100,000), overwhelming the cluster and causing only a few pods to be created successfully." + self.app.create_workload() + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + self.mitigation_oracle = OverloadReplicasMitigationOracle(problem=self, deployment_name="basic") + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = K8SOperatorFaultInjector(namespace="tidb-cluster") + injector.inject_overload_replicas() + print(f"[FAULT INJECTED] {self.faulty_service} overload replica failure\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = K8SOperatorFaultInjector(namespace="tidb-cluster") + injector.recover_overload_replicas() + print(f"[FAULT RECOVERED] {self.faulty_service} overload replica failure\n") diff --git a/sregym/conductor/problems/operator_misoperation/security_context_fault.py b/sregym/conductor/problems/operator_misoperation/security_context_fault.py new file mode 100644 index 0000000..4a74385 --- /dev/null +++ b/sregym/conductor/problems/operator_misoperation/security_context_fault.py @@ -0,0 +1,40 @@ +""" +The fault sets an invalid runAsUser value. +""" + +import time +from datetime import datetime, timedelta +from typing import Any + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.operator_misoperation.security_context_mitigation import SecurityContextMitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_operator import K8SOperatorFaultInjector +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.fleet_cast import FleetCast +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class K8SOperatorSecurityContextFault(Problem): + def __init__(self, faulty_service="tidb-app"): + app = FleetCast() + super().__init__(app=app, namespace="tidb-cluster") + self.faulty_service = faulty_service + self.kubectl = KubeCtl() + self.root_cause = "The TiDBCluster custom resource specifies an invalid runAsUser value in the security context, causing pods to fail to start or be rejected by the security policy." + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + self.mitigation_oracle = SecurityContextMitigationOracle(problem=self, deployment_name="basic") + self.app.create_workload() + + @mark_fault_injected + def inject_fault(self): + injector = K8SOperatorFaultInjector(namespace=self.namespace) + injector.inject_security_context_fault() + print(f"[FAULT INJECTED] {self.faulty_service} security context misconfigured") + + @mark_fault_injected + def recover_fault(self): + injector = K8SOperatorFaultInjector(namespace=self.namespace) + injector.recover_security_context_fault() + print(f"[FAULT RECOVERED] {self.faulty_service}") diff --git a/sregym/conductor/problems/operator_misoperation/wrong_update_strategy.py b/sregym/conductor/problems/operator_misoperation/wrong_update_strategy.py new file mode 100644 index 0000000..f008ff7 --- /dev/null +++ b/sregym/conductor/problems/operator_misoperation/wrong_update_strategy.py @@ -0,0 +1,44 @@ +""" +This fault specifies an invalid update strategy. +""" + +import time +from datetime import datetime, timedelta +from typing import Any + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.operator_misoperation.wrong_update_strategy_mitigation import ( + WrongUpdateStrategyMitigationOracle, +) +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_operator import K8SOperatorFaultInjector +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.fleet_cast import FleetCast +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class K8SOperatorWrongUpdateStrategyFault(Problem): + def __init__(self, faulty_service="tidb-app"): + app = FleetCast() + super().__init__(app=app, namespace="tidb-cluster") + self.faulty_service = faulty_service + self.kubectl = KubeCtl() + self.root_cause = "The TiDBCluster custom resource specifies an invalid update strategy, causing deployment updates to fail or get stuck." + self.app.create_workload() + + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + self.mitigation_oracle = WrongUpdateStrategyMitigationOracle(problem=self, deployment_name="basic") + + @mark_fault_injected + def inject_fault(self): + injector = K8SOperatorFaultInjector(namespace=self.namespace) + injector.inject_wrong_update_strategy() + print(f"[FAULT INJECTED] {self.faulty_service} wrong update strategy failure") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = K8SOperatorFaultInjector(namespace=self.namespace) + injector.recover_wrong_update_strategy() + print(f"[FAULT RECOVERED] {self.faulty_service} wrong update strategy failure\n") diff --git a/sregym/conductor/problems/payment_service_failure.py b/sregym/conductor/problems/payment_service_failure.py new file mode 100644 index 0000000..e75152f --- /dev/null +++ b/sregym/conductor/problems/payment_service_failure.py @@ -0,0 +1,32 @@ +"""Otel demo paymentServiceFailure feature flag fault.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_otel import OtelFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class PaymentServiceFailure(Problem): + def __init__(self): + self.app = AstronomyShop() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.injector = OtelFaultInjector(namespace=self.namespace) + self.faulty_service = "payment" + self.root_cause = f"The `{self.faulty_service}` service has a feature flag enabled that causes it to fail, resulting in service unavailability." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_fault("paymentFailure") + print(f"Fault: paymentServiceFailure | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.recover_fault("paymentFailure") diff --git a/sregym/conductor/problems/payment_service_unreachable.py b/sregym/conductor/problems/payment_service_unreachable.py new file mode 100644 index 0000000..7a9d312 --- /dev/null +++ b/sregym/conductor/problems/payment_service_unreachable.py @@ -0,0 +1,32 @@ +"""Otel demo paymentServiceUnreachable feature flag fault.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_otel import OtelFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class PaymentServiceUnreachable(Problem): + def __init__(self): + self.app = AstronomyShop() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.injector = OtelFaultInjector(namespace=self.namespace) + self.faulty_service = "checkout" + self.root_cause = f"The `{self.faulty_service}` service has a feature flag enabled that makes the payment service unreachable, causing checkout failures." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_fault("paymentUnreachable") + print(f"Fault: paymentServiceUnreachable | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.recover_fault("paymentUnreachable") diff --git a/sregym/conductor/problems/persistent_volume_affinity_violation.py b/sregym/conductor/problems/persistent_volume_affinity_violation.py new file mode 100644 index 0000000..0a6589e --- /dev/null +++ b/sregym/conductor/problems/persistent_volume_affinity_violation.py @@ -0,0 +1,51 @@ +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.app_registry import AppRegistry +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class PersistentVolumeAffinityViolation(Problem): + def __init__(self, app_name: str = "Social Network", faulty_service: str = "user-service"): + self.apps = AppRegistry() + self.app = self.apps.get_app_instance(app_name) + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = faulty_service + self.root_cause = f"The deployment `{self.faulty_service}` is configured with a PersistentVolume (temp-pv) that has node affinity to node A, but the deployment has a nodeSelector pointing to node B, causing a volume affinity violation and pods to remain in Pending state." + super().__init__(app=self.app, namespace=self.app.namespace) + + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.mitigation_oracle = MitigationOracle(problem=self) + + self.app.create_workload() + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + print("Injecting persistent volume affinity violation...") + + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._inject( + fault_type="persistent_volume_affinity_violation", + microservices=[self.faulty_service], + ) + + print(f"Expected effect: {self.faulty_service} pod should be stuck in Pending state") + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._recover( + fault_type="persistent_volume_affinity_violation", + microservices=[self.faulty_service], + ) + + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/pod_anti_affinity_deadlock.py b/sregym/conductor/problems/pod_anti_affinity_deadlock.py new file mode 100644 index 0000000..446b2e6 --- /dev/null +++ b/sregym/conductor/problems/pod_anti_affinity_deadlock.py @@ -0,0 +1,61 @@ +"""Pod Anti-Affinity Deadlock problem for microservice applications.""" + +import time + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class PodAntiAffinityDeadlock(Problem): + def __init__(self, faulty_service: str = "user-service"): + self.app = SocialNetwork() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = faulty_service + self.root_cause = f"The deployment `{self.faulty_service}` has strict pod anti-affinity rules (requiredDuringSchedulingIgnoredDuringExecution) that prevent multiple replicas from being scheduled on the same node, but with insufficient nodes, causing a scheduling deadlock where pods remain in Pending state." + super().__init__(app=self.app, namespace=self.app.namespace) + + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + # Create workload for evaluation + self.app.create_workload() + self.mitigation_oracle = MitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + print("Creating Pod Anti-Affinity Deadlock...") + print("Setting requiredDuringScheduling anti-affinity that excludes all nodes") + + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._inject( + fault_type="pod_anti_affinity_deadlock", + microservices=[self.faulty_service], + ) + + # Wait for the deadlock to manifest + time.sleep(30) + + print(f"Expected effect: Pods should be in Pending state with:") + print(f" '0/X nodes are available: X node(s) didn't match pod anti-affinity rules'") + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + print("Removing pod anti-affinity deadlock...") + print("Changing requiredDuring to preferredDuring or removing anti-affinity rules") + + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._recover( + fault_type="pod_anti_affinity_deadlock", + microservices=[self.faulty_service], + ) + + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/postgres.md b/sregym/conductor/problems/postgres.md new file mode 100644 index 0000000..5c14854 --- /dev/null +++ b/sregym/conductor/problems/postgres.md @@ -0,0 +1,54 @@ + +# PostgreSQL 17.5 Installation Guide (Mac Homebrew) +Follow these steps to **build PostgreSQL 17.5 from source** on macOS. + +## **Step 0. Install dependencies** +brew install gcc make flex bison readline zlib +brew install icu4c +export PKG_CONFIG_PATH="$(brew --prefix icu4c)/lib/pkgconfig" + +## **Step 1. Download PostgreSQL source** + +curl -OLv https://ftp.postgresql.org/pub/source/v17.5/postgresql-17.5.tar.gz + +## **Step 2. Extract the tarbell** +tar xzf postgresql-17.5.tar.gz +cd postgresql-17.5 + +## **Step 3. Configure the build** +./configure + +## **Step 4. Build PostgreSQL** +make + +## **Step 5. Initialize the database cluster.** +export PATH=$HOME/pgsql/bin:$PATH +initdb -D $HOME/pgsql/data + +## **Step 6. Start the PostgreSQL server** +pg_ctl -D $HOME/pgsql/data -l logfile start + +## **Step 7. Create a test database** +createdb test + +## **Step 8. Connect to your test database** +psql test + +## All of the commands : + +``` +brew install gcc make flex bison readline zlib +brew install icu4c +export PKG_CONFIG_PATH="$(brew --prefix icu4c)/lib/pkgconfig" +curl -OLv https://ftp.postgresql.org/pub/source/v17.5/postgresql-17.5.tar.gz +tar xzf postgresql-17.5.tar.gz +cd postgresql-17.5 +./configure +make +export PATH=$HOME/pgsql/bin:$PATH +initdb -D $HOME/pgsql/data +pg_ctl -D $HOME/pgsql/data -l logfile start +createdb test +psql test + +``` diff --git a/sregym/conductor/problems/product_catalog_failure.py b/sregym/conductor/problems/product_catalog_failure.py new file mode 100644 index 0000000..2170cdd --- /dev/null +++ b/sregym/conductor/problems/product_catalog_failure.py @@ -0,0 +1,32 @@ +"""Otel demo productCatalogFailure feature flag fault.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_otel import OtelFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class ProductCatalogServiceFailure(Problem): + def __init__(self): + self.app = AstronomyShop() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.injector = OtelFaultInjector(namespace=self.namespace) + self.faulty_service = "product-catalog" + self.root_cause = f"The `{self.faulty_service}` service has a feature flag enabled that causes it to fail, resulting in service unavailability." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_fault("productCatalogFailure") + print(f"Fault: productCatalogFailure | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.recover_fault("productCatalogFailure") diff --git a/sregym/conductor/problems/pvc_claim_mismatch.py b/sregym/conductor/problems/pvc_claim_mismatch.py new file mode 100644 index 0000000..1147a94 --- /dev/null +++ b/sregym/conductor/problems/pvc_claim_mismatch.py @@ -0,0 +1,46 @@ +"""Redeployment of the HotelReservation application but do not handle PV.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class PVCClaimMismatch(Problem): + def __init__(self): + self.app = HotelReservation() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.app.payload_script = ( + TARGET_MICROSERVICES / "hotelReservation/wrk2/scripts/hotel-reservation/mixed-workload_type_1.lua" + ) + self.faulty_service = [ + "mongodb-geo", + "mongodb-profile", + "mongodb-rate", + "mongodb-recommendation", + "mongodb-reservation", + "mongodb-user", + ] + self.injector = VirtualizationFaultInjector(namespace=self.namespace) + self.root_cause = "Multiple MongoDB deployments are configured with PVC claim names that do not exist (claimName-broken), causing pods to remain in Pending state." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = MitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_pvc_claim_mismatch(microservices=self.faulty_service) + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.recover_pvc_claim_mismatch(microservices=self.faulty_service) diff --git a/sregym/conductor/problems/rbac_misconfiguration.py b/sregym/conductor/problems/rbac_misconfiguration.py new file mode 100644 index 0000000..89d5de0 --- /dev/null +++ b/sregym/conductor/problems/rbac_misconfiguration.py @@ -0,0 +1,45 @@ +from sregym.conductor.oracles.compound import CompoundedOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.oracles.workload import WorkloadOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class RBACMisconfiguration(Problem): + def __init__(self, app_name: str = "astronomy_shop", faulty_service: str = "frontend"): + if app_name == "astronomy_shop": + self.app = AstronomyShop() + else: + raise ValueError(f"Unsupported app name: {app_name}") + + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = faulty_service + + super().__init__(app=self.app, namespace=self.app.namespace) + self.root_cause = f"The deployment `{self.faulty_service}` uses a ServiceAccount with a ClusterRole that lacks ConfigMap permissions, but an init container tries to access a ConfigMap, causing the init container to fail and pods to remain in Init state." + + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = MitigationOracle(problem=self) + + # self.mitigation_oracle = CompoundedOracle(self, WorkloadOracle(problem=self, wrk_manager=self.app.wrk)) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection: RBAC Init Container Misconfiguration ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._inject(fault_type="rbac_misconfiguration", microservices=[self.faulty_service]) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery: RBAC Init Container Misconfiguration ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._recover(fault_type="rbac_misconfiguration", microservices=[self.faulty_service]) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/read_error.py b/sregym/conductor/problems/read_error.py new file mode 100644 index 0000000..4d2ad85 --- /dev/null +++ b/sregym/conductor/problems/read_error.py @@ -0,0 +1,52 @@ +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_hw import HWFaultInjector +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class ReadError(Problem): + """ + Problem: inject syscall-level EIO (-5) failures into `read()` for all pods on a target node. + """ + + def __init__(self, target_node: str = None): + self.app = HotelReservation() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.injector = HWFaultInjector() + self.target_node = target_node + + # (Optional) pick a request mix payload + self.app.payload_script = ( + TARGET_MICROSERVICES / "hotelReservation/wrk2/scripts/hotel-reservation/mixed-workload_type_1.lua" + ) + + super().__init__(app=self.app, namespace=self.app.namespace) + self.root_cause = f"System call-level EIO (-5) failures are injected into `read()` operations for all pods on a target node, causing I/O errors and service failures." + + self.app.create_workload() + + def requires_khaos(self) -> bool: + """This problem requires Khaos for eBPF-based fault injection.""" + return True + + @mark_fault_injected + def inject_fault(self): + print(f"== Fault Injection: read_error ==") + self.target_node = self.injector.inject_node(self.namespace, "read_error", self.target_node) + print(f"[debug] target_node: {self.target_node}") + # Setup diagnosis oracle here since we now have the target node + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + print(f"Injected read_error into pods on node {self.target_node}\n") + + @mark_fault_injected + def recover_fault(self): + print(f"== Fault Recovery: read_error on node {self.target_node} ==") + if self.target_node: + self.injector.recover_node(self.namespace, "read_error", self.target_node) + else: + print("[warn] No target node recorded; attempting best-effort recovery.") + print("Recovery request sent.\n") diff --git a/sregym/conductor/problems/readiness_probe_misconfiguration.py b/sregym/conductor/problems/readiness_probe_misconfiguration.py new file mode 100644 index 0000000..cdbf2ec --- /dev/null +++ b/sregym/conductor/problems/readiness_probe_misconfiguration.py @@ -0,0 +1,55 @@ +from sregym.conductor.oracles.compound import CompoundedOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.oracles.workload import WorkloadOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class ReadinessProbeMisconfiguration(Problem): + def __init__(self, app_name="social_network", faulty_service="user-service"): + self.app_name = app_name + self.faulty_service = faulty_service + + if app_name == "social_network": + self.app = SocialNetwork() + elif app_name == "hotel_reservation": + self.app = HotelReservation() + elif app_name == "astronomy_shop": + self.app = AstronomyShop() + else: + raise ValueError(f"Unsupported app name: {app_name}") + + super().__init__(app=self.app, namespace=self.app.namespace) + + self.kubectl = KubeCtl() + self.root_cause = f"The deployment `{self.faulty_service}` has a misconfigured readiness probe pointing to a non-existent health endpoint (/healthz on port 8080), causing pods to never become ready and be excluded from service endpoints." + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = MitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector = VirtualizationFaultInjector(namespace=self.namespace) + self.injector._inject( + fault_type="readiness_probe_misconfiguration", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector._recover( + fault_type="readiness_probe_misconfiguration", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/recommendation_service_cache_failure.py b/sregym/conductor/problems/recommendation_service_cache_failure.py new file mode 100644 index 0000000..477b685 --- /dev/null +++ b/sregym/conductor/problems/recommendation_service_cache_failure.py @@ -0,0 +1,32 @@ +"""Otel demo recommendationServiceCacheFailure feature flag fault.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_otel import OtelFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class RecommendationServiceCacheFailure(Problem): + def __init__(self): + self.app = AstronomyShop() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.injector = OtelFaultInjector(namespace=self.namespace) + self.faulty_service = "recommendation" + self.root_cause = f"The `{self.faulty_service}` service has a feature flag enabled that causes cache failures, resulting in degraded service performance." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_fault("recommendationCacheFailure") + print(f"Fault: recommendationServiceCacheFailure | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.recover_fault("recommendationCacheFailure") diff --git a/sregym/conductor/problems/registry.py b/sregym/conductor/problems/registry.py new file mode 100644 index 0000000..ef4fcaf --- /dev/null +++ b/sregym/conductor/problems/registry.py @@ -0,0 +1,202 @@ +from sregym.conductor.problems.ad_service_failure import AdServiceFailure +from sregym.conductor.problems.ad_service_high_cpu import AdServiceHighCpu +from sregym.conductor.problems.ad_service_manual_gc import AdServiceManualGc +from sregym.conductor.problems.assign_non_existent_node import AssignNonExistentNode +from sregym.conductor.problems.auth_miss_mongodb import MongoDBAuthMissing +from sregym.conductor.problems.capacity_decrease_rpc_retry_storm import CapacityDecreaseRPCRetryStorm +from sregym.conductor.problems.cart_service_failure import CartServiceFailure +from sregym.conductor.problems.configmap_drift import ConfigMapDrift +from sregym.conductor.problems.duplicate_pvc_mounts import DuplicatePVCMounts +from sregym.conductor.problems.env_variable_shadowing import EnvVariableShadowing +from sregym.conductor.problems.faulty_image_correlated import FaultyImageCorrelated +from sregym.conductor.problems.gc_capacity_degradation import GCCapacityDegradation +from sregym.conductor.problems.image_slow_load import ImageSlowLoad +from sregym.conductor.problems.incorrect_image import IncorrectImage +from sregym.conductor.problems.incorrect_port_assignment import IncorrectPortAssignment +from sregym.conductor.problems.ingress_misroute import IngressMisroute +from sregym.conductor.problems.kafka_queue_problems import KafkaQueueProblems +from sregym.conductor.problems.kubelet_crash import KubeletCrash +from sregym.conductor.problems.latent_sector_error import LatentSectorError +from sregym.conductor.problems.liveness_probe_misconfiguration import LivenessProbeMisconfiguration +from sregym.conductor.problems.liveness_probe_too_aggressive import LivenessProbeTooAggressive +from sregym.conductor.problems.load_spike_rpc_retry_storm import LoadSpikeRPCRetryStorm +from sregym.conductor.problems.loadgenerator_flood_homepage import LoadGeneratorFloodHomepage +from sregym.conductor.problems.misconfig_app import MisconfigAppHotelRes +from sregym.conductor.problems.missing_configmap import MissingConfigMap +from sregym.conductor.problems.missing_env_variable import MissingEnvVariable +from sregym.conductor.problems.missing_service import MissingService +from sregym.conductor.problems.multiple_failures import MultipleIndependentFailures +from sregym.conductor.problems.namespace_memory_limit import NamespaceMemoryLimit +from sregym.conductor.problems.network_policy_block import NetworkPolicyBlock +from sregym.conductor.problems.operator_misoperation.invalid_affinity_toleration import ( + K8SOperatorInvalidAffinityTolerationFault, +) +from sregym.conductor.problems.operator_misoperation.non_existent_storage import K8SOperatorNonExistentStorageFault +from sregym.conductor.problems.operator_misoperation.overload_replicas import K8SOperatorOverloadReplicasFault +from sregym.conductor.problems.operator_misoperation.security_context_fault import K8SOperatorSecurityContextFault +from sregym.conductor.problems.operator_misoperation.wrong_update_strategy import K8SOperatorWrongUpdateStrategyFault +from sregym.conductor.problems.payment_service_failure import PaymentServiceFailure +from sregym.conductor.problems.payment_service_unreachable import PaymentServiceUnreachable +from sregym.conductor.problems.persistent_volume_affinity_violation import PersistentVolumeAffinityViolation +from sregym.conductor.problems.pod_anti_affinity_deadlock import PodAntiAffinityDeadlock +from sregym.conductor.problems.product_catalog_failure import ProductCatalogServiceFailure +from sregym.conductor.problems.pvc_claim_mismatch import PVCClaimMismatch +from sregym.conductor.problems.rbac_misconfiguration import RBACMisconfiguration +from sregym.conductor.problems.read_error import ReadError +from sregym.conductor.problems.readiness_probe_misconfiguration import ReadinessProbeMisconfiguration +from sregym.conductor.problems.recommendation_service_cache_failure import RecommendationServiceCacheFailure +from sregym.conductor.problems.resource_request import ResourceRequestTooLarge, ResourceRequestTooSmall +from sregym.conductor.problems.revoke_auth import MongoDBRevokeAuth +from sregym.conductor.problems.rolling_update_misconfigured import RollingUpdateMisconfigured +from sregym.conductor.problems.scale_pod import ScalePodSocialNet +from sregym.conductor.problems.service_dns_resolution_failure import ServiceDNSResolutionFailure +from sregym.conductor.problems.sidecar_port_conflict import SidecarPortConflict +from sregym.conductor.problems.silent_data_corruption import SilentDataCorruption +from sregym.conductor.problems.stale_coredns_config import StaleCoreDNSConfig +from sregym.conductor.problems.storage_user_unregistered import MongoDBUserUnregistered +from sregym.conductor.problems.taint_no_toleration import TaintNoToleration +from sregym.conductor.problems.target_port import K8STargetPortMisconfig +from sregym.conductor.problems.train_ticket_f22 import TrainTicketF22 +from sregym.conductor.problems.trainticket_f17 import TrainTicketF17 +from sregym.conductor.problems.update_incompatible_correlated import UpdateIncompatibleCorrelated +from sregym.conductor.problems.valkey_auth_disruption import ValkeyAuthDisruption +from sregym.conductor.problems.valkey_memory_disruption import ValkeyMemoryDisruption +from sregym.conductor.problems.workload_imbalance import WorkloadImbalance +from sregym.conductor.problems.wrong_bin_usage import WrongBinUsage +from sregym.conductor.problems.wrong_dns_policy import WrongDNSPolicy +from sregym.conductor.problems.wrong_service_selector import WrongServiceSelector +from sregym.service.kubectl import KubeCtl + + +# fmt: off +class ProblemRegistry: + def __init__(self): + self.PROBLEM_REGISTRY = { + # ==================== APPLICATION FAULT INJECTOR ==================== + # --- CORRELATED PROBLEMS --- + "faulty_image_correlated": FaultyImageCorrelated, + "update_incompatible_correlated": UpdateIncompatibleCorrelated, + # --- REGULAR APPLICATION PROBLEMS --- + "incorrect_image": IncorrectImage, + "incorrect_port_assignment": IncorrectPortAssignment, + "misconfig_app_hotel_res": MisconfigAppHotelRes, + "missing_env_variable_astronomy_shop": lambda: MissingEnvVariable(app_name="astronomy_shop", faulty_service="frontend" ), + "revoke_auth_mongodb-1": lambda: MongoDBRevokeAuth(faulty_service="mongodb-geo"), + "revoke_auth_mongodb-2": lambda: MongoDBRevokeAuth(faulty_service="mongodb-rate"), + "storage_user_unregistered-1": lambda: MongoDBUserUnregistered(faulty_service="mongodb-geo"), + "storage_user_unregistered-2": lambda: MongoDBUserUnregistered(faulty_service="mongodb-rate"), + "valkey_auth_disruption": ValkeyAuthDisruption, + "valkey_memory_disruption": ValkeyMemoryDisruption, + # # ==================== VIRTUALIZATION FAULT INJECTOR ==================== + # --- METASTABLE FAILURES --- + "capacity_decrease_rpc_retry_storm": CapacityDecreaseRPCRetryStorm, + "gc_capacity_degradation": GCCapacityDegradation, + "load_spike_rpc_retry_storm": LoadSpikeRPCRetryStorm, + # --- REGULAR VIRTUALIZATION PROBLEMS --- + "assign_to_non_existent_node": AssignNonExistentNode, + "auth_miss_mongodb": MongoDBAuthMissing, + "configmap_drift_hotel_reservation": lambda: ConfigMapDrift(faulty_service="geo"), + "duplicate_pvc_mounts_astronomy_shop": lambda: DuplicatePVCMounts(app_name="astronomy_shop", faulty_service="frontend"), + "duplicate_pvc_mounts_hotel_reservation": lambda: DuplicatePVCMounts(app_name="hotel_reservation", faulty_service="frontend"), + "duplicate_pvc_mounts_social_network": lambda: DuplicatePVCMounts(app_name="social_network", faulty_service="jaeger"), + "env_variable_shadowing_astronomy_shop": lambda: EnvVariableShadowing(), + "k8s_target_port-misconfig": lambda: K8STargetPortMisconfig(faulty_service="user-service"), + "liveness_probe_misconfiguration_astronomy_shop": lambda: LivenessProbeMisconfiguration(app_name="astronomy_shop", faulty_service="frontend"), + "liveness_probe_misconfiguration_hotel_reservation": lambda: LivenessProbeMisconfiguration(app_name="hotel_reservation", faulty_service="recommendation"), + "liveness_probe_misconfiguration_social_network": lambda: LivenessProbeMisconfiguration(app_name="social_network", faulty_service="user-service"), + "liveness_probe_too_aggressive_astronomy_shop": lambda: LivenessProbeTooAggressive(app_name="astronomy_shop"), + "liveness_probe_too_aggressive_hotel_reservation": lambda: LivenessProbeTooAggressive(app_name="hotel_reservation"), + "liveness_probe_too_aggressive_social_network": lambda: LivenessProbeTooAggressive(app_name="social_network"), + "missing_configmap_hotel_reservation": lambda: MissingConfigMap(app_name="hotel_reservation", faulty_service="mongodb-geo"), + "missing_configmap_social_network": lambda: MissingConfigMap(app_name="social_network", faulty_service="media-mongodb"), + "missing_service_astronomy_shop": lambda: MissingService(app_name="astronomy_shop", faulty_service="ad"), + "missing_service_hotel_reservation": lambda: MissingService(app_name="hotel_reservation", faulty_service="mongodb-rate"), + "missing_service_social_network": lambda: MissingService(app_name="social_network", faulty_service="user-service"), + "namespace_memory_limit": NamespaceMemoryLimit, + "pod_anti_affinity_deadlock": PodAntiAffinityDeadlock, + "persistent_volume_affinity_violation": PersistentVolumeAffinityViolation, + "pvc_claim_mismatch": PVCClaimMismatch, + "rbac_misconfiguration": RBACMisconfiguration, + "readiness_probe_misconfiguration_astronomy_shop": lambda: ReadinessProbeMisconfiguration(app_name="astronomy_shop", faulty_service="frontend"), + "readiness_probe_misconfiguration_hotel_reservation": lambda: ReadinessProbeMisconfiguration(app_name="hotel_reservation", faulty_service="frontend"), + "readiness_probe_misconfiguration_social_network": lambda: ReadinessProbeMisconfiguration(app_name="social_network", faulty_service="user-service"), + "resource_request_too_large": lambda: ResourceRequestTooLarge(app_name="hotel_reservation", faulty_service="mongodb-rate"), + "resource_request_too_small": lambda: ResourceRequestTooSmall(app_name="hotel_reservation", faulty_service="mongodb-rate"), + "rolling_update_misconfigured_hotel_reservation": lambda: RollingUpdateMisconfigured(app_name="hotel_reservation"), + "rolling_update_misconfigured_social_network": lambda: RollingUpdateMisconfigured(app_name="social_network"), + "scale_pod_zero_social_net": ScalePodSocialNet, + "service_dns_resolution_failure_astronomy_shop": lambda: ServiceDNSResolutionFailure(app_name="astronomy_shop", faulty_service="frontend"), + "service_dns_resolution_failure_social_network": lambda: ServiceDNSResolutionFailure(app_name="social_network", faulty_service="user-service"), + "sidecar_port_conflict_astronomy_shop": lambda: SidecarPortConflict(app_name="astronomy_shop", faulty_service="frontend"), + "sidecar_port_conflict_hotel_reservation": lambda: SidecarPortConflict(app_name="hotel_reservation", faulty_service="frontend"), + "sidecar_port_conflict_social_network": lambda: SidecarPortConflict(app_name="social_network", faulty_service="user-service"), + "stale_coredns_config_astronomy_shop": lambda: StaleCoreDNSConfig(app_name="astronomy_shop"), + "stale_coredns_config_social_network": lambda: StaleCoreDNSConfig(app_name="social_network"), + "taint_no_toleration_social_network": lambda: TaintNoToleration(), + "wrong_bin_usage": WrongBinUsage, + "wrong_dns_policy_astronomy_shop": lambda: WrongDNSPolicy(app_name="astronomy_shop", faulty_service="frontend"), + "wrong_dns_policy_hotel_reservation": lambda: WrongDNSPolicy(app_name="hotel_reservation", faulty_service="profile"), + "wrong_dns_policy_social_network": lambda: WrongDNSPolicy(app_name="social_network", faulty_service="user-service"), + "wrong_service_selector_astronomy_shop": lambda: WrongServiceSelector(app_name="astronomy_shop", faulty_service="frontend"), + "wrong_service_selector_hotel_reservation": lambda: WrongServiceSelector(app_name="hotel_reservation", faulty_service="frontend"), + "wrong_service_selector_social_network": lambda: WrongServiceSelector(app_name="social_network", faulty_service="user-service"), + # ==================== OPENTELEMETRY FAULT INJECTOR ==================== + "astronomy_shop_ad_service_failure": AdServiceFailure, + "astronomy_shop_ad_service_high_cpu": AdServiceHighCpu, + "astronomy_shop_ad_service_manual_gc": AdServiceManualGc, + "astronomy_shop_cart_service_failure": CartServiceFailure, + "astronomy_shop_ad_service_image_slow_load": ImageSlowLoad, + "astronomy_shop_payment_service_failure": PaymentServiceFailure, + "astronomy_shop_payment_service_unreachable": PaymentServiceUnreachable, + "astronomy_shop_product_catalog_service_failure": ProductCatalogServiceFailure, + "astronomy_shop_recommendation_service_cache_failure": RecommendationServiceCacheFailure, + "kafka_queue_problems": KafkaQueueProblems, + "loadgenerator_flood_homepage": LoadGeneratorFloodHomepage, + # ==================== TRAIN TICKET FAULT INJECTOR ==================== + "trainticket_f17_nested_sql_select_clause_error": TrainTicketF17, + "trainticket_f22_sql_column_name_mismatch_error": TrainTicketF22, + # ==================== HARDWARE FAULT INJECTOR ==================== + "read_error": ReadError, + "latent_sector_error": LatentSectorError, + "silent_data_corruption": SilentDataCorruption, + # ==================== DIRECT K8S API ==================== + "ingress_misroute": lambda: IngressMisroute(path="/api", correct_service="frontend-service", wrong_service="recommendation-service"), + "network_policy_block": lambda: NetworkPolicyBlock(faulty_service="payment-service"), + # ==================== MULTIPLE INDEPENDENT FAILURES ==================== + "social_net_hotel_res_astro_shop_concurrent_failures": lambda: MultipleIndependentFailures(problems=[K8STargetPortMisconfig(faulty_service="user-service"),MongoDBRevokeAuth(faulty_service="mongodb-geo"),WrongServiceSelector(),]), + # ad hoc: + "kubelet_crash": KubeletCrash, + "workload_imbalance": WorkloadImbalance, + # ==================== K8S OPERATOR MISOPERATION ================== + "operator_overload_replicas": K8SOperatorOverloadReplicasFault, + "operator_non_existent_storage": K8SOperatorNonExistentStorageFault, + "operator_invalid_affinity_toleration": K8SOperatorInvalidAffinityTolerationFault, + "operator_security_context_fault": K8SOperatorSecurityContextFault, + "operator_wrong_update_strategy_fault": K8SOperatorWrongUpdateStrategyFault, + } +# fmt: on + self.kubectl = KubeCtl() + self.non_emulated_cluster_problems = [] + + def get_problem_instance(self, problem_id: str): + if problem_id not in self.PROBLEM_REGISTRY: + raise ValueError(f"Problem ID {problem_id} not found in registry.") + + is_emulated_cluster = self.kubectl.is_emulated_cluster() + if is_emulated_cluster and problem_id in self.non_emulated_cluster_problems: + raise RuntimeError(f"Problem ID {problem_id} is not supported in emulated clusters.") + + return self.PROBLEM_REGISTRY.get(problem_id)() + + def get_problem(self, problem_id: str): + return self.PROBLEM_REGISTRY.get(problem_id) + + def get_problem_ids(self, task_type: str = None): + if task_type: + return [k for k in self.PROBLEM_REGISTRY.keys() if task_type in k] + return list(self.PROBLEM_REGISTRY.keys()) + + def get_problem_count(self, task_type: str = None): + if task_type: + return len([k for k in self.PROBLEM_REGISTRY.keys() if task_type in k]) + return len(self.PROBLEM_REGISTRY) diff --git a/sregym/conductor/problems/resource_request.py b/sregym/conductor/problems/resource_request.py new file mode 100644 index 0000000..f6b7d12 --- /dev/null +++ b/sregym/conductor/problems/resource_request.py @@ -0,0 +1,91 @@ +import copy +from abc import abstractmethod + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class ResourceRequest(Problem): + def __init__(self, app_name: str = "hotel_reservation", faulty_service: str = "frontend"): + self.app_name = app_name + self.faulty_service = faulty_service + + if self.app_name == "hotel_reservation": + self.app = HotelReservation() + elif self.app_name == "social_network": + self.app = SocialNetwork() + elif self.app_name == "astronomy_shop": + self.app = AstronomyShop() + else: + raise ValueError(f"Unsupported app_name: {app_name}") + + super().__init__(app=self.app, namespace=self.app.namespace) + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + # Note: root_cause will be set in subclasses (ResourceRequestTooLarge/ResourceRequestTooSmall) + # diagnosis_oracle will be set in subclasses after root_cause is set + self.app.create_workload() + self.mitigation_oracle = MitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._inject( + fault_type="resource_request", + microservices=[self.faulty_service], + duration=self.set_memory_limit, # Not a duration + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._recover( + fault_type="resource_request", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @abstractmethod + def set_memory_limit(self, deployment_yaml) -> dict: + pass + + +class ResourceRequestTooLarge(ResourceRequest): + def __init__(self, app_name: str = "hotel_reservation", faulty_service: str = "frontend"): + super().__init__(app_name, faulty_service) + self.root_cause = f"The deployment `{self.faulty_service}` has a memory request that exceeds the node's memory capacity, causing pods to be unschedulable and remain in Pending state." + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + def set_memory_limit(self, deployment_yaml): + dyaml = copy.deepcopy(deployment_yaml) + upper_limit = self.kubectl.get_node_memory_capacity() + new_limit = self.kubectl.format_k8s_memory((upper_limit + 100 * 1024) * 2) + dyaml["spec"]["template"]["spec"]["containers"][0]["resources"]["requests"]["memory"] = new_limit + print(f"Setting memory request to {new_limit} for {self.faulty_service}") + return dyaml + + +class ResourceRequestTooSmall(ResourceRequest): + def __init__(self, app_name: str = "hotel_reservation", faulty_service: str = "frontend"): + super().__init__(app_name, faulty_service) + self.root_cause = f"The deployment `{self.faulty_service}` has a memory limit that is too small (10Mi), causing pods to be killed due to OOM (Out of Memory) errors." + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + def set_memory_limit(self, deployment_yaml): + dyaml = copy.deepcopy(deployment_yaml) + new_limit = "10Mi" + dyaml["spec"]["template"]["spec"]["containers"][0]["resources"].setdefault("limits", dict())[ + "memory" + ] = new_limit + print(f"Setting memory limit to {new_limit} for {self.faulty_service}") + return dyaml diff --git a/sregym/conductor/problems/revoke_auth.py b/sregym/conductor/problems/revoke_auth.py new file mode 100644 index 0000000..c2a68f0 --- /dev/null +++ b/sregym/conductor/problems/revoke_auth.py @@ -0,0 +1,47 @@ +"""MongoDB revoke authentication problem in the HotelReservation application.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_app import ApplicationFaultInjector +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class MongoDBRevokeAuth(Problem): + def __init__(self, faulty_service: str = "mongodb-geo"): + self.app = HotelReservation() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = faulty_service + self.root_cause = f"The service `{self.faulty_service}-db` is configured to revoke the access to the database for the service `{self.faulty_service}`." + # NOTE: change the faulty service to mongodb-rate to create another scenario + # self.faulty_service = "mongodb-rate" + self.app.payload_script = ( + TARGET_MICROSERVICES / "hotelReservation/wrk2/scripts/hotel-reservation/mixed-workload_type_1.lua" + ) + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = MitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = ApplicationFaultInjector(namespace=self.namespace) + injector._inject( + fault_type="revoke_auth", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = ApplicationFaultInjector(namespace=self.namespace) + injector._recover(fault_type="revoke_auth", microservices=[self.faulty_service]) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/rolling_update_misconfigured.py b/sregym/conductor/problems/rolling_update_misconfigured.py new file mode 100644 index 0000000..3579716 --- /dev/null +++ b/sregym/conductor/problems/rolling_update_misconfigured.py @@ -0,0 +1,45 @@ +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.rolling_update_misconfiguration_mitigation import RollingUpdateMitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class RollingUpdateMisconfigured(Problem): + def __init__(self, app_name: str = "social_network"): + self.faulty_service = "custom-service" + self.app_name = app_name + + if self.app_name == "social_network": + self.app = SocialNetwork() + elif self.app_name == "hotel_reservation": + self.app = HotelReservation() + else: + raise ValueError(f"Unsupported app name: {app_name}") + + super().__init__(app=self.app, namespace=self.app.namespace) + + self.kubectl = KubeCtl() + self.root_cause = f"The deployment `{self.faulty_service}` has a misconfigured rolling update strategy (maxUnavailable=100%, maxSurge=0%) with an init container that hangs indefinitely, causing the deployment to be stuck during updates." + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = RollingUpdateMitigationOracle(problem=self, deployment_name=self.faulty_service) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._inject(fault_type="rolling_update_misconfigured", microservices=[self.faulty_service]) + + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._recover(fault_type="rolling_update_misconfigured", microservices=[self.faulty_service]) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}") diff --git a/sregym/conductor/problems/scale_pod.py b/sregym/conductor/problems/scale_pod.py new file mode 100644 index 0000000..7753955 --- /dev/null +++ b/sregym/conductor/problems/scale_pod.py @@ -0,0 +1,52 @@ +"""Scale pod replica to zero problem for the SocialNetwork application.""" + +import time + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.scale_pod_zero_mitigation import ScalePodZeroMitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class ScalePodSocialNet(Problem): + def __init__(self): + self.app = SocialNetwork() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + # self.faulty_service = "url-shorten-mongodb" + self.faulty_service = "user-service" + # Choose a very front service to test - this will directly cause an exception + # TODO: We should create more problems with this using different faulty services + # self.faulty_service = "nginx-thrift" + self.root_cause = f"The deployment `{self.faulty_service}` is scaled down to 0 replicas, causing the service to be unavailable." + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = ScalePodZeroMitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._inject( + fault_type="scale_pods_to_zero", + microservices=[self.faulty_service], + ) + # Terminating the pod may take long time when scaling + time.sleep(30) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._recover( + fault_type="scale_pods_to_zero", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/service_dns_resolution_failure.py b/sregym/conductor/problems/service_dns_resolution_failure.py new file mode 100644 index 0000000..5ea6b69 --- /dev/null +++ b/sregym/conductor/problems/service_dns_resolution_failure.py @@ -0,0 +1,54 @@ +from sregym.conductor.oracles.dns_resolution_mitigation import DNSResolutionMitigationOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class ServiceDNSResolutionFailure(Problem): + def __init__(self, app_name="astronomy_shop", faulty_service="frontend"): + self.app_name = app_name + self.faulty_service = faulty_service + + if app_name == "social_network": + self.app = SocialNetwork() + elif app_name == "hotel_reservation": + self.app = HotelReservation() + elif app_name == "astronomy_shop": + self.app = AstronomyShop() + else: + raise ValueError(f"Unsupported app name: {app_name}") + + super().__init__(app=self.app, namespace=self.app.namespace) + + self.kubectl = KubeCtl() + self.root_cause = f"CoreDNS is configured with an NXDOMAIN template for the service `{self.faulty_service}.{self.namespace}.svc.cluster.local`, causing DNS resolution to fail for this service." + + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = DNSResolutionMitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector = VirtualizationFaultInjector(namespace=self.namespace) + self.injector._inject( + fault_type="service_dns_resolution_failure", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector = VirtualizationFaultInjector(namespace=self.namespace) + self.injector._recover( + fault_type="service_dns_resolution_failure", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/sidecar_port_conflict.py b/sregym/conductor/problems/sidecar_port_conflict.py new file mode 100644 index 0000000..0826132 --- /dev/null +++ b/sregym/conductor/problems/sidecar_port_conflict.py @@ -0,0 +1,53 @@ +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class SidecarPortConflict(Problem): + def __init__(self, app_name: str = "astronomy_shop", faulty_service: str = "frontend"): + self.app_name = app_name + self.faulty_service = faulty_service + + if app_name == "social_network": + self.app = SocialNetwork() + elif app_name == "hotel_reservation": + self.app = HotelReservation() + elif app_name == "astronomy_shop": + self.app = AstronomyShop() + else: + raise ValueError(f"Unsupported app name: {app_name}") + + super().__init__(app=self.app, namespace=self.app.namespace) + + self.kubectl = KubeCtl() + self.root_cause = f"The deployment `{self.faulty_service}` has a sidecar container that binds to the same port as the main container, causing port conflicts and preventing the service from starting properly." + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = MitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._inject( + fault_type="sidecar_port_conflict", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._recover( + fault_type="sidecar_port_conflict", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/silent_data_corruption.py b/sregym/conductor/problems/silent_data_corruption.py new file mode 100644 index 0000000..4ee4ccc --- /dev/null +++ b/sregym/conductor/problems/silent_data_corruption.py @@ -0,0 +1,249 @@ +from enum import StrEnum +import json +import time +from typing import Optional + +from sregym.conductor.oracles.compound import CompoundedOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_kernel import KernelInjector +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected +from sregym.conductor.oracles.localization import LocalizationOracle +from sregym.service.dm_flakey_manager import DM_FLAKEY_DEVICE_NAME, DmFlakeyManager +from sregym.conductor.oracles.workload import WorkloadOracle + + +class SilentDataCorruptionStrategy(StrEnum): + READ_CORRUPT = "read_corrupt" + WRITE_CORRUPT = "write_corrupt" + BOTH_CORRUPT = "both_corrupt" + + +class SilentDataCorruption(Problem): + + def __init__( + self, + target_deploy: str = "mongodb-geo", + namespace: str = "hotel-reservation", + strategy: SilentDataCorruptionStrategy = SilentDataCorruptionStrategy.BOTH_CORRUPT, + probability: int = 100, # (0-100)% probability + up_interval: int = 0, # Seconds device is healthy + down_interval: int = 1, # Seconds device corrupts data + ): + self.app = HotelReservation() + self.kubectl = KubeCtl() + self.namespace = namespace + self.deploy = target_deploy + self.injector = KernelInjector(self.kubectl) + self.dm_flakey_manager = DmFlakeyManager(self.kubectl) + self.target_node: Optional[str] = None + self.strategy = strategy + self.probability = probability + self.up_interval = up_interval + self.down_interval = down_interval + self.probability = self.probability * 10000000 # (0-1000000000 scale) for (0-100% probability) + + super().__init__(app=self.app, namespace=self.app.namespace) + + self.root_cause = "There's a silent data corruption on the hard drive that the mongodb-geo service's data is on." + + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + + def requires_khaos(self) -> bool: + """This problem requires Khaos for dm-flakey infrastructure setup.""" + return True + + def _discover_node_for_deploy(self) -> Optional[str]: + """Return the node where the target deployment is running.""" + # First try with a label selector (common OpenEBS hotel-reservation pattern) + svc = self.deploy.split("-", 1)[-1] # e.g. "geo" + cmd = f"kubectl -n {self.namespace} get pods -l app=mongodb,component={svc} -o json" + out = self.kubectl.exec_command(cmd) + if isinstance(out, tuple): + out = out[0] + data = json.loads(out or "{}") + for item in data.get("items", []): + if item.get("status", {}).get("phase") == "Running": + return item["spec"]["nodeName"] + + # Fallback: search by pod name prefix + cmd = f"kubectl -n {self.namespace} get pods -o json" + out = self.kubectl.exec_command(cmd) + if isinstance(out, tuple): + out = out[0] + data = json.loads(out or "{}") + for item in data.get("items", []): + name = item["metadata"]["name"] + if name.startswith(self.deploy) and item.get("status", {}).get("phase") == "Running": + return item["spec"]["nodeName"] + + return None + + def _get_mongodb_pod(self) -> Optional[str]: + svc = self.deploy.split("-", 1)[-1] + cmd = f"kubectl -n {self.namespace} get pods -l app=mongodb,component={svc} -o jsonpath='{{.items[0].metadata.name}}'" + out = self.kubectl.exec_command(cmd) + if isinstance(out, tuple): + out = out[0] + pod_name = out.strip() if out else "" + if not pod_name or pod_name.startswith("error"): + cmd = f"kubectl -n {self.namespace} get pods -o json" + out = self.kubectl.exec_command(cmd) + if isinstance(out, tuple): + out = out[0] + data = json.loads(out or "{}") + for item in data.get("items", []): + name = item["metadata"]["name"] + if name.startswith(self.deploy) and item.get("status", {}).get("phase") == "Running": + return name + return pod_name if pod_name else None + + def _get_database_name(self) -> str: + svc = self.deploy.split("-", 1)[-1] + return f"{svc}-db" + + def mongo_write(self, hotel_id: str, lat: float, lon: float) -> bool: + pod_name = self._get_mongodb_pod() + if not pod_name: + return False + db_name = self._get_database_name() + collection = self.deploy.split("-", 1)[-1] + write_cmd = ( + f"kubectl -n {self.namespace} exec {pod_name} -- " + f"mongo {db_name} --eval " + f"'db.{collection}.insertOne({{hotelId: \"{hotel_id}\", lat: {lat}, lon: {lon}}})' " + f"--quiet --username admin --password admin --authenticationDatabase admin" + ) + try: + out = self.kubectl.exec_command(write_cmd) + fsync_cmd = ( + f"kubectl -n {self.namespace} exec {pod_name} -- " + f"mongo {db_name} --eval 'db.runCommand({{fsync: 1}})' " + f"--quiet --username admin --password admin --authenticationDatabase admin" + ) + self.kubectl.exec_command(fsync_cmd) + self.kubectl.exec_command(f"kubectl -n {self.namespace} exec {pod_name} -- sync") + return True + except Exception: + return False + + def mongo_read(self, hotel_id: str) -> Optional[dict]: + pod_name = self._get_mongodb_pod() + if not pod_name: + return None + db_name = self._get_database_name() + collection = self.deploy.split("-", 1)[-1] + read_cmd = ( + f"kubectl -n {self.namespace} exec {pod_name} -- " + f"mongo {db_name} --eval 'db.{collection}.findOne({{hotelId: \"{hotel_id}\"}})' " + f"--quiet --username admin --password admin --authenticationDatabase admin" + ) + try: + out = self.kubectl.exec_command(read_cmd) + except Exception: + return None + + def _get_corruption_features(self) -> str: + """ + Build the dm-flakey feature string based on strategy. + Returns features like: "random_read_corrupt 500000000" or "random_read_corrupt 500000000 random_write_corrupt 500000000" + """ + features = [] + + if self.strategy == SilentDataCorruptionStrategy.READ_CORRUPT: + features.append(f"random_read_corrupt {self.probability}") + elif self.strategy == SilentDataCorruptionStrategy.WRITE_CORRUPT: + features.append(f"random_write_corrupt {self.probability}") + elif self.strategy == SilentDataCorruptionStrategy.BOTH_CORRUPT: + features.append(f"random_read_corrupt {self.probability}") + features.append(f"random_write_corrupt {self.probability}") + + return " ".join(features) + + @mark_fault_injected + def inject_fault(self): + print(f"[SDC] Starting silent data corruption injection for {self.deploy}") + + # Get target node where the deployment is running + self.target_node = self._discover_node_for_deploy() + if not self.target_node: + raise RuntimeError(f"Could not find running node for deployment {self.deploy}") + + print(f"[SDC] Target node: {self.target_node}") + print(f"[SDC] Strategy: {self.strategy}") + print(f"[SDC] Probability: {self.probability}/1000000000 ({self.probability/10000000:.1f}%)") + print(f"[SDC] Up interval: {self.up_interval}s, Down interval: {self.down_interval}s") + + # Get corruption features string + features = self._get_corruption_features() + print(f"[SDC] Features: {features}") + + # The dm-flakey device is already set up by DmFlakeyManager in Conductor + # We just need to configure it with corruption features + + print(f"[SDC] Configuring dm-flakey device for corruption...") + self.injector.dm_flakey_reload( + self.target_node, + DM_FLAKEY_DEVICE_NAME, + up_interval=self.up_interval, + down_interval=self.down_interval, + features=features + ) + + print(f"[SDC] Triggering MongoDB write and read to exercise corruption...") + import random + for _ in range(10): + test_id = "SDC_TRIGGER_"+str(random.randint(0, 10000)) + lat = 30 + random.randint(0, 10000)*0.0001 + lon = -120 + random.randint(0, 10000)*0.0001 + self.mongo_write(test_id, lat, lon) + self.injector.drop_caches(self.target_node, show_log=False) + self.mongo_read(test_id) + + print(f"[SDC] Silent data corruption injection complete") + if self.up_interval == 0: + print(f"[SDC] ⚠️ Device corruption is ALWAYS ACTIVE (no healthy intervals)") + else: + print(f"[SDC] Device will corrupt data for {self.down_interval}s every {self.up_interval + self.down_interval}s") + + @mark_fault_injected + def recover_fault(self): + print(f"[SDC] Starting recovery from silent data corruption") + + # Restore dm-flakey device to normal operation + if hasattr(self, "target_node") and self.target_node: + print(f"[SDC] Restoring dm-flakey device to normal operation on {self.target_node}") + self.injector.dm_flakey_reload( + self.target_node, + DM_FLAKEY_DEVICE_NAME, + up_interval=1, + down_interval=0, + features="" + ) + print(f"[SDC] ✅ dm-flakey device restored to normal operation") + + # Clean up and redeploy the app + self.app.cleanup() + + try: + cleanup_pods = self.kubectl.exec_command( + "kubectl get pods -n openebs --no-headers | grep 'cleanup-pvc-' | awk '{print $1}'" + ).strip() + if cleanup_pods: + pod_list = [p for p in cleanup_pods.splitlines() if p.strip()] + for pod in pod_list: + # Delete failed cleanup pods + self.kubectl.exec_command(f"kubectl delete pod -n openebs {pod} --ignore-not-found") + print(f"[SDC] Cleaned up {len(pod_list)} OpenEBS cleanup pod(s)") + except Exception as e: + print(f"[SDC] ⚠️ Warning: Failed to clean up OpenEBS cleanup pods: {e}") + + self.dm_flakey_manager.setup_openebs_dm_flakey_infrastructure() # This helps clean up any corrupted data on the affected storage directories + self.app.deploy() + self.app.start_workload() + + print(f"[SDC] ✅ Recovery complete - App restarted with clean state") diff --git a/sregym/conductor/problems/stale_coredns_config.py b/sregym/conductor/problems/stale_coredns_config.py new file mode 100644 index 0000000..9eab9dd --- /dev/null +++ b/sregym/conductor/problems/stale_coredns_config.py @@ -0,0 +1,54 @@ +from sregym.conductor.oracles.dns_resolution_mitigation import DNSResolutionMitigationOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class StaleCoreDNSConfig(Problem): + def __init__(self, app_name="astronomy_shop"): + self.app_name = app_name + self.faulty_service = None + + if app_name == "social_network": + self.app = SocialNetwork() + elif app_name == "hotel_reservation": + self.app = HotelReservation() + elif app_name == "astronomy_shop": + self.app = AstronomyShop() + else: + raise ValueError(f"Unsupported app name: {app_name}") + + super().__init__(app=self.app, namespace=self.app.namespace) + + self.kubectl = KubeCtl() + self.root_cause = "CoreDNS is configured with a stale NXDOMAIN template for all .svc.cluster.local domains, causing DNS resolution to fail for all cluster-internal services." + + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = DNSResolutionMitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector = VirtualizationFaultInjector(namespace=self.namespace) + self.injector._inject( + fault_type="stale_coredns_config", + microservices=None, + ) + print(f"Injected stale CoreDNS config | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector = VirtualizationFaultInjector(namespace=self.namespace) + self.injector._recover( + fault_type="stale_coredns_config", + microservices=None, + ) + print(f"Recovered from stale CoreDNS config | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/storage_user_unregistered.py b/sregym/conductor/problems/storage_user_unregistered.py new file mode 100644 index 0000000..2cac6eb --- /dev/null +++ b/sregym/conductor/problems/storage_user_unregistered.py @@ -0,0 +1,50 @@ +"""MongoDB storage user unregistered problem in the HotelReservation application.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_app import ApplicationFaultInjector +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class MongoDBUserUnregistered(Problem): + def __init__(self, faulty_service: str = "mongodb-geo"): + self.app = HotelReservation() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = faulty_service + # NOTE: change the faulty service to mongodb-rate to create another scenario + # self.faulty_service = "mongodb-rate" + self.root_cause = f"The MongoDB service `{self.faulty_service}` has an unregistered user, causing authentication failures for the associated service." + self.app.payload_script = ( + TARGET_MICROSERVICES / "hotelReservation/wrk2/scripts/hotel-reservation/mixed-workload_type_1.lua" + ) + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = MitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = ApplicationFaultInjector(namespace=self.namespace) + injector._inject( + fault_type="storage_user_unregistered", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = ApplicationFaultInjector(namespace=self.namespace) + injector._recover( + fault_type="storage_user_unregistered", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/taint_no_toleration.py b/sregym/conductor/problems/taint_no_toleration.py new file mode 100644 index 0000000..8c79146 --- /dev/null +++ b/sregym/conductor/problems/taint_no_toleration.py @@ -0,0 +1,62 @@ +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class TaintNoToleration(Problem): + def __init__(self): + self.app = SocialNetwork() + self.namespace = self.app.namespace + self.kubectl = KubeCtl() + + # ── pick all real worker nodes dynamically ─────────────────────── + self.faulty_nodes = self._pick_worker_nodes() + self.faulty_service = "user-service" + self.root_cause = f"Worker nodes are tainted with sre-fault=blocked:NoSchedule, but the deployment `{self.faulty_service}` has a toleration for a different key (dummy-key), causing pods to be unschedulable and remain in Pending state." + + super().__init__(app=self.app, namespace=self.namespace) + + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + # TODO: support more precise diagnosis oracle: Nodes or DeploymentConfiguration + + self.app.create_workload() + self.mitigation_oracle = MitigationOracle(problem=self) + + self.injector = VirtualizationFaultInjector(namespace=self.namespace) + + def _pick_worker_nodes(self) -> list[str]: + """Return the names of all nodes that are *not* control-plane.""" + nodes = self.kubectl.core_v1_api.list_node().items + worker_names = [] + for n in nodes: + labels = n.metadata.labels or {} + if "node-role.kubernetes.io/control-plane" not in labels: + worker_names.append(n.metadata.name) + if not worker_names: + # fallback to first node if somehow all are control-plane + return [nodes[0].metadata.name] + return worker_names + + @mark_fault_injected + def inject_fault(self): + print(f"Injecting Fault to Service {self.faulty_service} on Nodes {self.faulty_nodes}") + for node in self.faulty_nodes: + self.kubectl.exec_command(f"kubectl taint node {node} sre-fault=blocked:NoSchedule --overwrite") + + patch = """[{"op": "add", "path": "/spec/template/spec/tolerations", + "value": [{"key": "dummy-key", "operator": "Exists", "effect": "NoSchedule"}]}]""" + self.kubectl.exec_command( + f"kubectl patch deployment {self.faulty_service} -n {self.namespace} --type='json' -p='{patch}'" + ) + self.kubectl.exec_command(f"kubectl delete pod -l app={self.faulty_service} -n {self.namespace}") + + @mark_fault_injected + def recover_fault(self): + print("Fault Recovery") + # assuming recover_toleration_without_matching_taint can accept multiple services and a node list + for node in self.faulty_nodes: + self.injector.recover_toleration_without_matching_taint([self.faulty_service], node_name=node) diff --git a/sregym/conductor/problems/target_port.py b/sregym/conductor/problems/target_port.py new file mode 100644 index 0000000..1e8bee5 --- /dev/null +++ b/sregym/conductor/problems/target_port.py @@ -0,0 +1,44 @@ +"""K8S misconfig fault problem in the SocialNetwork application.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.target_port_mitigation import TargetPortMisconfigMitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class K8STargetPortMisconfig(Problem): + def __init__(self, faulty_service="user-service"): + app = SocialNetwork() + super().__init__(app=app, namespace=app.namespace) + + self.faulty_service = faulty_service + self.kubectl = KubeCtl() + self.root_cause = f"The service `{self.faulty_service}` has a misconfigured target port (9999 instead of 9090), causing connection failures." + + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = TargetPortMisconfigMitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._inject( + fault_type="misconfig_k8s", + microservices=[self.faulty_service], + ) + print(f"[FAULT INJECTED] {self.faulty_service} misconfigured") + + @mark_fault_injected + def recover_fault(self): + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._recover( + fault_type="misconfig_k8s", + microservices=[self.faulty_service], + ) + print(f"[FAULT RECOVERED] {self.faulty_service}") diff --git a/sregym/conductor/problems/train_ticket_f22.py b/sregym/conductor/problems/train_ticket_f22.py new file mode 100644 index 0000000..3a6d833 --- /dev/null +++ b/sregym/conductor/problems/train_ticket_f22.py @@ -0,0 +1,51 @@ +import logging + +from sregym.conductor.oracles.compound import CompoundedOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.oracles.workload import WorkloadOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_tt import TrainTicketFaultInjector +from sregym.service.apps.train_ticket import TrainTicket +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + +logger = logging.getLogger(__name__) + + +class TrainTicketF22(Problem): + def __init__(self): + self.app_name = "train-ticket" + self.faulty_service = "ts-contacts-service" + self.fault_name = "fault-22-sql-column-name-mismatch-error" + self.app = TrainTicket() + + super().__init__(app=self.app, namespace=self.app.namespace) + self.root_cause = f"The deployment `{self.faulty_service}` has a SQL column name mismatch error in its database queries, causing database operation failures." + + self.kubectl = KubeCtl() + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = CompoundedOracle( + self, + WorkloadOracle(problem=self, wrk_manager=self.app.wrk), + ) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector = TrainTicketFaultInjector(namespace=self.namespace) + self.injector._inject( + fault_type="fault-22-sql-column-name-mismatch-error", + ) + print(f"Injected fault-22-sql-column-name-mismatch-error | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector = TrainTicketFaultInjector(namespace=self.namespace) + self.injector._recover( + fault_type="fault-22-sql-column-name-mismatch-error", + ) + print(f"Recovered from fault-22-sql-column-name-mismatch-error | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/trainticket_f17.py b/sregym/conductor/problems/trainticket_f17.py new file mode 100644 index 0000000..c42466d --- /dev/null +++ b/sregym/conductor/problems/trainticket_f17.py @@ -0,0 +1,51 @@ +import logging + +from sregym.conductor.oracles.compound import CompoundedOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.mitigation import MitigationOracle +from sregym.conductor.oracles.workload import WorkloadOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_tt import TrainTicketFaultInjector +from sregym.service.apps.train_ticket import TrainTicket +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + +logger = logging.getLogger(__name__) + + +class TrainTicketF17(Problem): + def __init__(self): + self.app_name = "train-ticket" + self.faulty_service = "ts-voucher-service" + self.fault_name = "fault-17-nested-sql-select-clause-error" + self.app = TrainTicket() + + super().__init__(app=self.app, namespace=self.app.namespace) + self.root_cause = f"The deployment `{self.faulty_service}` has a nested SQL SELECT clause error in its database queries, causing database operation failures." + + self.kubectl = KubeCtl() + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = CompoundedOracle( + self, + WorkloadOracle(problem=self, wrk_manager=self.app.wrk), + ) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector = TrainTicketFaultInjector(namespace=self.namespace) + self.injector._inject( + fault_type="fault-17-nested-sql-select-clause-error", + ) + print(f"Injected fault-17-nested-sql-select-clause-error | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector = TrainTicketFaultInjector(namespace=self.namespace) + self.injector._recover( + fault_type="fault-17-nested-sql-select-clause-error", + ) + print(f"Recovered from fault-17-nested-sql-select-clause-error | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/update_incompatible_correlated.py b/sregym/conductor/problems/update_incompatible_correlated.py new file mode 100644 index 0000000..70be9bb --- /dev/null +++ b/sregym/conductor/problems/update_incompatible_correlated.py @@ -0,0 +1,54 @@ +from sregym.conductor.oracles.incorrect_image_mitigation import IncorrectImageMitigationOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_app import ApplicationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class UpdateIncompatibleCorrelated(Problem): + def __init__(self): + self.app = HotelReservation() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = [ + "mongodb-geo", + "mongodb-profile", + "mongodb-rate", + "mongodb-recommendation", + "mongodb-reservation", + "mongodb-user", + ] + self.root_cause = "The MongoDB deployments (mongodb-geo, mongodb-profile, mongodb-rate, and mongodb-recommendation) are updated to use an incompatible image version 'mongo:8.0.14-rc0'." + self.injector = ApplicationFaultInjector(namespace=self.namespace) + super().__init__(app=self.app, namespace=self.namespace) + + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + # not really the incorrect image problem, just reuse the incorrect image function + self.mitigation_oracle = IncorrectImageMitigationOracle( + problem=self, actual_images={service: "mongo:8.0.14-rc0" for service in self.faulty_service} + ) + + self.app.create_workload() + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + # not really the incorrect image problem, just reuse the incorrect image function + for service in self.faulty_service: + self.injector.inject_incorrect_image( + deployment_name=service, namespace=self.namespace, bad_image="mongo:8.0.14-rc0" + ) + print(f"Service: {service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + for service in self.faulty_service: + self.injector.recover_incorrect_image( + deployment_name=service, + namespace=self.namespace, + correct_image="mongo:4.4.6", + ) diff --git a/sregym/conductor/problems/valkey_auth_disruption.py b/sregym/conductor/problems/valkey_auth_disruption.py new file mode 100644 index 0000000..971020e --- /dev/null +++ b/sregym/conductor/problems/valkey_auth_disruption.py @@ -0,0 +1,36 @@ +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.valkey_auth_mitigation import ValkeyAuthMitigation +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_app import ApplicationFaultInjector +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class ValkeyAuthDisruption(Problem): + def __init__(self): + app = AstronomyShop() + super().__init__(app=app, namespace=app.namespace) + + self.faulty_service = "valkey-cart" + self.kubectl = KubeCtl() + self.root_cause = f"The valkey-cart service has an invalid password configured, causing authentication failures for dependent services." + + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + self.mitigation_oracle = ValkeyAuthMitigation(problem=self) + + self.app.create_workload() + + @mark_fault_injected + def inject_fault(self): + injector = ApplicationFaultInjector(namespace=self.namespace) + injector._inject(fault_type="valkey_auth_disruption") + print(f"[FAULT INJECTED] valkey auth disruption") + + @mark_fault_injected + def recover_fault(self): + injector = ApplicationFaultInjector(namespace=self.namespace) + injector._recover(fault_type="valkey_auth_disruption") + print(f"[FAULT INJECTED] valkey auth disruption") diff --git a/sregym/conductor/problems/valkey_memory_disruption.py b/sregym/conductor/problems/valkey_memory_disruption.py new file mode 100644 index 0000000..bf9ee12 --- /dev/null +++ b/sregym/conductor/problems/valkey_memory_disruption.py @@ -0,0 +1,33 @@ +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_app import ApplicationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class ValkeyMemoryDisruption(Problem): + def __init__(self): + app = AstronomyShop() + super().__init__(app=app, namespace=app.namespace) + + self.faulty_service = "valkey-cart" + self.kubectl = KubeCtl() + self.root_cause = "A job is flooding the valkey-cart store with large payloads (10MB each), causing it to enter an out-of-memory (OOM) state." + + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + + @mark_fault_injected + def inject_fault(self): + injector = ApplicationFaultInjector(namespace=self.namespace) + injector._inject(fault_type="valkey_memory_disruption") + print(f"[FAULT INJECTED] valkey auth disruption") + + @mark_fault_injected + def recover_fault(self): + injector = ApplicationFaultInjector(namespace=self.namespace) + injector._recover(fault_type="valkey_memory_disruption") + print(f"[FAULT INJECTED] valkey memory disruption") diff --git a/sregym/conductor/problems/workload_imbalance.py b/sregym/conductor/problems/workload_imbalance.py new file mode 100644 index 0000000..bf26a63 --- /dev/null +++ b/sregym/conductor/problems/workload_imbalance.py @@ -0,0 +1,58 @@ +import time + +from sregym.conductor.oracles.imbalance_mitigation import ImbalanceMitigationOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class WorkloadImbalance(Problem): + def __init__(self): + self.app = AstronomyShop() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = ["frontend"] + self.injector = VirtualizationFaultInjector(namespace="kube-system") + self.injector_for_scale = VirtualizationFaultInjector(namespace=self.namespace) + self.root_cause = "The kube-proxy daemonset is using a buggy image version, and the frontend deployment is scaled to 5 replicas with a high workload surge, causing workload imbalance across pods." + super().__init__(app=self.app, namespace=self.namespace) + + # not so precise here by now + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.mitigation_oracle = ImbalanceMitigationOracle(problem=self) + + self.app.create_workload() + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + self.injector.inject_daemon_set_image_replacement( + daemon_set_name="kube-proxy", new_image="docker.io/jackcuii/kube-proxy:v1.31.12" + ) + print(f"Service: {self.faulty_service[0]} | Namespace: {self.namespace}\n") + self.injector_for_scale.scale_pods_to(replicas=5, microservices=self.faulty_service) + self.kubectl.wait_for_ready(namespace=self.namespace) + # surge the workload + print("== Surge the workload ==") + self.app.wrk.change_users(number=500, namespace=self.namespace) + self.app.wrk.change_spawn_rate(rate=50, namespace=self.namespace) + print("== Wait the workload to be stable ==") + time.sleep(10) + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + self.injector.inject_daemon_set_image_replacement( + daemon_set_name="kube-proxy", new_image="registry.k8s.io/kube-proxy:v1.31.13" + ) + print(f"Service: {self.faulty_service[0]} | Namespace: {self.namespace}\n") + self.injector_for_scale.scale_pods_to(replicas=1, microservices=self.faulty_service) + self.kubectl.wait_for_ready(namespace=self.namespace) + # reduce the workload + print("== Reduce the workload ==") + self.app.wrk.change_users(number=10, namespace=self.namespace) + self.app.wrk.change_spawn_rate(rate=1, namespace=self.namespace) diff --git a/sregym/conductor/problems/wrong_bin_usage.py b/sregym/conductor/problems/wrong_bin_usage.py new file mode 100644 index 0000000..6136daf --- /dev/null +++ b/sregym/conductor/problems/wrong_bin_usage.py @@ -0,0 +1,49 @@ +"""Wrong binary usage problem in the HotelReservation application.""" + +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.wrong_bin_mitigation import WrongBinMitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class WrongBinUsage(Problem): + def __init__(self, faulty_service: str = "profile"): + self.app = HotelReservation() + self.kubectl = KubeCtl() + self.namespace = self.app.namespace + self.faulty_service = faulty_service + self.root_cause = f"The deployment `{self.faulty_service}` is configured to use the wrong binary (geo instead of profile), causing the service to malfunction." + + self.app.payload_script = ( + TARGET_MICROSERVICES / "hotelReservation/wrk2/scripts/hotel-reservation/mixed-workload_type_1.lua" + ) + super().__init__(app=self.app, namespace=self.app.namespace) + # === Attach evaluation oracles === + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = WrongBinMitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._inject( + fault_type="wrong_bin_usage", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._recover( + fault_type="wrong_bin_usage", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/wrong_dns_policy.py b/sregym/conductor/problems/wrong_dns_policy.py new file mode 100644 index 0000000..eeaded4 --- /dev/null +++ b/sregym/conductor/problems/wrong_dns_policy.py @@ -0,0 +1,54 @@ +from sregym.conductor.oracles.dns_resolution_mitigation import DNSResolutionMitigationOracle +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class WrongDNSPolicy(Problem): + def __init__(self, app_name="astronomy_shop", faulty_service="frontend"): + self.app_name = app_name + self.faulty_service = faulty_service + + if app_name == "social_network": + self.app = SocialNetwork() + elif app_name == "hotel_reservation": + self.app = HotelReservation() + elif app_name == "astronomy_shop": + self.app = AstronomyShop() + else: + raise ValueError(f"Unsupported app name: {app_name}") + + super().__init__(app=self.app, namespace=self.app.namespace) + + self.kubectl = KubeCtl() + self.root_cause = f"The deployment `{self.faulty_service}` has a misconfigured DNS policy (set to None with external nameserver 8.8.8.8), causing DNS resolution failures for cluster-internal services." + + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = DNSResolutionMitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._inject( + fault_type="wrong_dns_policy", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._recover( + fault_type="wrong_dns_policy", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/problems/wrong_service_selector.py b/sregym/conductor/problems/wrong_service_selector.py new file mode 100644 index 0000000..b8981ee --- /dev/null +++ b/sregym/conductor/problems/wrong_service_selector.py @@ -0,0 +1,54 @@ +from sregym.conductor.oracles.llm_as_a_judge.llm_as_a_judge_oracle import LLMAsAJudgeOracle +from sregym.conductor.oracles.service_endpoint_mitigation import ServiceEndpointMitigationOracle +from sregym.conductor.problems.base import Problem +from sregym.generators.fault.inject_virtual import VirtualizationFaultInjector +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.kubectl import KubeCtl +from sregym.utils.decorators import mark_fault_injected + + +class WrongServiceSelector(Problem): + def __init__(self, app_name="astronomy_shop", faulty_service="frontend"): + self.app_name = app_name + self.faulty_service = faulty_service + + if app_name == "social_network": + self.app = SocialNetwork() + elif app_name == "hotel_reservation": + self.app = HotelReservation() + elif app_name == "astronomy_shop": + self.app = AstronomyShop() + else: + raise ValueError(f"Unsupported app name: {app_name}") + + super().__init__(app=self.app, namespace=self.app.namespace) + + self.kubectl = KubeCtl() + self.root_cause = f"The service `{self.faulty_service}` has a misconfigured selector that includes an additional incorrect label, preventing it from matching the intended pods." + + self.diagnosis_oracle = LLMAsAJudgeOracle(problem=self, expected=self.root_cause) + + self.app.create_workload() + self.mitigation_oracle = ServiceEndpointMitigationOracle(problem=self) + + @mark_fault_injected + def inject_fault(self): + print("== Fault Injection ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._inject( + fault_type="wrong_service_selector", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") + + @mark_fault_injected + def recover_fault(self): + print("== Fault Recovery ==") + injector = VirtualizationFaultInjector(namespace=self.namespace) + injector._recover( + fault_type="wrong_service_selector", + microservices=[self.faulty_service], + ) + print(f"Service: {self.faulty_service} | Namespace: {self.namespace}\n") diff --git a/sregym/conductor/tasklist.yml.example b/sregym/conductor/tasklist.yml.example new file mode 100644 index 0000000..2ad43f2 --- /dev/null +++ b/sregym/conductor/tasklist.yml.example @@ -0,0 +1,260 @@ +all: + problems: + faulty_image_correlated: + - diagnosis + - mitigation + update_incompatible_correlated: + - diagnosis + - mitigation + incorrect_image: + - diagnosis + - mitigation + incorrect_port_assignment: + - diagnosis + - mitigation + misconfig_app_hotel_res: + - diagnosis + - mitigation + missing_env_variable_astronomy_shop: + - diagnosis + - mitigation + revoke_auth_mongodb-1: + - diagnosis + - mitigation + revoke_auth_mongodb-2: + - diagnosis + - mitigation + storage_user_unregistered-1: + - diagnosis + - mitigation + storage_user_unregistered-2: + - diagnosis + - mitigation + valkey_auth_disruption: + - diagnosis + - mitigation + valkey_memory_disruption: + - diagnosis + - mitigation + capacity_decrease_rpc_retry_storm: + - diagnosis + - mitigation + gc_capacity_degradation: + - diagnosis + - mitigation + load_spike_rpc_retry_storm: + - diagnosis + - mitigation + assign_to_non_existent_node: + - diagnosis + - mitigation + auth_miss_mongodb: + - diagnosis + - mitigation + configmap_drift_hotel_reservation: + - diagnosis + - mitigation + duplicate_pvc_mounts_astronomy_shop: + - diagnosis + - mitigation + duplicate_pvc_mounts_hotel_reservation: + - diagnosis + - mitigation + duplicate_pvc_mounts_social_network: + - diagnosis + - mitigation + env_variable_shadowing_astronomy_shop: + - diagnosis + - mitigation + k8s_target_port-misconfig: + - diagnosis + - mitigation + liveness_probe_misconfiguration_astronomy_shop: + - diagnosis + - mitigation + liveness_probe_misconfiguration_hotel_reservation: + - diagnosis + - mitigation + liveness_probe_misconfiguration_social_network: + - diagnosis + - mitigation + liveness_probe_too_aggressive_astronomy_shop: + - diagnosis + - mitigation + liveness_probe_too_aggressive_hotel_reservation: + - diagnosis + - mitigation + liveness_probe_too_aggressive_social_network: + - diagnosis + - mitigation + missing_configmap_hotel_reservation: + - diagnosis + - mitigation + missing_configmap_social_network: + - diagnosis + - mitigation + missing_service_astronomy_shop: + - diagnosis + - mitigation + missing_service_hotel_reservation: + - diagnosis + - mitigation + missing_service_social_network: + - diagnosis + - mitigation + namespace_memory_limit: + - diagnosis + - mitigation + pod_anti_affinity_deadlock: + - diagnosis + - mitigation + persistent_volume_affinity_violation: + - diagnosis + - mitigation + pvc_claim_mismatch: + - diagnosis + - mitigation + rbac_misconfiguration: + - diagnosis + - mitigation + readiness_probe_misconfiguration_astronomy_shop: + - diagnosis + - mitigation + readiness_probe_misconfiguration_hotel_reservation: + - diagnosis + - mitigation + readiness_probe_misconfiguration_social_network: + - diagnosis + - mitigation + resource_request_too_large: + - diagnosis + - mitigation + resource_request_too_small: + - diagnosis + - mitigation + rolling_update_misconfigured_hotel_reservation: + - diagnosis + - mitigation + rolling_update_misconfigured_social_network: + - diagnosis + - mitigation + scale_pod_zero_social_net: + - diagnosis + - mitigation + service_dns_resolution_failure_astronomy_shop: + - diagnosis + - mitigation + service_dns_resolution_failure_social_network: + - diagnosis + - mitigation + sidecar_port_conflict_astronomy_shop: + - diagnosis + - mitigation + sidecar_port_conflict_hotel_reservation: + - diagnosis + - mitigation + sidecar_port_conflict_social_network: + - diagnosis + - mitigation + stale_coredns_config_astronomy_shop: + - diagnosis + - mitigation + stale_coredns_config_social_network: + - diagnosis + - mitigation + taint_no_toleration_social_network: + - diagnosis + - mitigation + wrong_bin_usage: + - diagnosis + - mitigation + wrong_dns_policy_astronomy_shop: + - diagnosis + - mitigation + wrong_dns_policy_hotel_reservation: + - diagnosis + - mitigation + wrong_dns_policy_social_network: + - diagnosis + - mitigation + wrong_service_selector_astronomy_shop: + - diagnosis + - mitigation + wrong_service_selector_hotel_reservation: + - diagnosis + - mitigation + wrong_service_selector_social_network: + - diagnosis + - mitigation + astronomy_shop_ad_service_failure: + - diagnosis + - mitigation + astronomy_shop_ad_service_high_cpu: + - diagnosis + - mitigation + astronomy_shop_ad_service_manual_gc: + - diagnosis + - mitigation + astronomy_shop_cart_service_failure: + - diagnosis + - mitigation + astronomy_shop_ad_service_image_slow_load: + - diagnosis + - mitigation + astronomy_shop_payment_service_failure: + - diagnosis + - mitigation + astronomy_shop_payment_service_unreachable: + - diagnosis + - mitigation + astronomy_shop_product_catalog_service_failure: + - diagnosis + - mitigation + astronomy_shop_recommendation_service_cache_failure: + - diagnosis + - mitigation + kafka_queue_problems_hotel_reservation: + - diagnosis + - mitigation + loadgenerator_flood_homepage: + - diagnosis + - mitigation + trainticket_f17_nested_sql_select_clause_error: + - diagnosis + - mitigation + trainticket_f22_sql_column_name_mismatch_error: + - diagnosis + - mitigation + read_error: + - diagnosis + - mitigation + ingress_misroute: + - diagnosis + - mitigation + network_policy_block: + - diagnosis + - mitigation + social_net_hotel_res_astro_shop_concurrent_failures: + - diagnosis + - mitigation + kubelet_crash: + - diagnosis + - mitigation + workload_imbalance: + - diagnosis + - mitigation + operator_overload_replicas: + - diagnosis + - mitigation + operator_non_existent_storage: + - diagnosis + - mitigation + operator_invalid_affinity_toleration: + - diagnosis + - mitigation + operator_security_context_fault: + - diagnosis + - mitigation + operator_wrong_update_strategy_fault: + - diagnosis + - mitigation diff --git a/sregym/conductor/utils.py b/sregym/conductor/utils.py new file mode 100644 index 0000000..a8912b8 --- /dev/null +++ b/sregym/conductor/utils.py @@ -0,0 +1,4 @@ +def is_ordered_subset(A: list, B: list) -> bool: + """Check if list A is a subset of B and in the same order.""" + it = iter(B) + return all(a in it for a in A) diff --git a/sregym/generators/__init__.py b/sregym/generators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sregym/generators/fault/__init__.py b/sregym/generators/fault/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sregym/generators/fault/base.py b/sregym/generators/fault/base.py new file mode 100644 index 0000000..90e00ae --- /dev/null +++ b/sregym/generators/fault/base.py @@ -0,0 +1,65 @@ +"""Interface for fault injector classes. + +NOTE: Each layer of injection like ApplicationFaultInjector, OSFaultInjector HardwareFaultInjector, etc. +are implemented as child classes of FaultInjector. +""" + +import time + + +class FaultInjector: + def __init__(self, testbed): + self.testbed = testbed + + # Deprecated method + def inject_fault( + self, + fault_type: str, + fault_id: str, + start_time: float, + end_time: float, + microservices: list[str] = None, + ): + """ + Base class to inject a fault into the specified microservices. + + Parameters: + microservices (list[str]): list of microservices to inject the fault into. + fault_type (str): Type of fault to inject. + fault_id (str): Unique identifier for the fault. + start_time (float): Time to start the fault injection (epoch time). + end_time (float): Time to end the fault injection (epoch time). + """ + current_time = time.time() + if current_time < start_time: + time.sleep(start_time - current_time) + + self._inject(microservices, fault_type) + + def _inject(self, fault_type: str, microservices: list[str] = None, duration: str = None): + if duration: + self._invoke_method("inject", fault_type, microservices, duration) + elif microservices: + self._invoke_method("inject", fault_type, microservices) + else: + self._invoke_method("inject", fault_type) + time.sleep(6) + + def _recover( + self, + fault_type: str, + microservices: list[str] = None, + ): + if microservices and fault_type: + self._invoke_method("recover", fault_type, microservices) + elif fault_type: + self._invoke_method("recover", fault_type) + + def _invoke_method(self, action_prefix, *args): + """helper: injects/recovers faults based on name""" + method_name = f"{action_prefix}_{args[0]}" + method = getattr(self, method_name, None) + if method: + method(*args[1:]) + else: + print(f"Unknown fault type: {args[0]}") diff --git a/sregym/generators/fault/custom/slow_service.py b/sregym/generators/fault/custom/slow_service.py new file mode 100644 index 0000000..16c2489 --- /dev/null +++ b/sregym/generators/fault/custom/slow_service.py @@ -0,0 +1,42 @@ +import http.server +import socketserver +import time +import random + + +INIT_SECONDS = 15 +_START = time.time() + + +def _initialized() -> bool: + return time.time() - _START >= INIT_SECONDS + + +class MainHandler(http.server.SimpleHTTPRequestHandler): + + def _write(self, status: int, body: str): + self.send_response(status) + self.end_headers() + self.wfile.write(body.encode()) + + def do_GET(self): + path = self.path.rstrip('/') + if path in {"/health", "/ready"}: + if _initialized(): + self._write(200, "OK") + else: + + time.sleep(random.uniform(0.05, 0.2)) + self._write(503, "Initializing") + else: + self._write(200, "Auxiliary service running") + + def log_message(self, fmt, *args): + if self.path not in {"/health", "/ready"}: + super().log_message(fmt, *args) + + +if __name__ == "__main__": + print("[service] starting on :8080 (init {}s)".format(INIT_SECONDS)) + with socketserver.TCPServer(("", 8080), MainHandler) as httpd: + httpd.serve_forever() \ No newline at end of file diff --git a/sregym/generators/fault/helpers.py b/sregym/generators/fault/helpers.py new file mode 100644 index 0000000..b227e3e --- /dev/null +++ b/sregym/generators/fault/helpers.py @@ -0,0 +1,120 @@ +import subprocess + +# SocialNetwork service process names +sn_svc_process_names = [ + "ComposePostServ", + "HomeTimelineSer", + "MediaService", + "PostStorageServ", + "SocialGraphServ", + "TextService", + "UserService", + "UrlShortenServi", + "UserMentionServ", + "UserTimelineSer", + "UniqueIdService", +] + +# SocialNetwork MongoDB process names +sn_mongod_process_names = ["mongod"] + +# SocialNetwork Redis process names +sn_redis_process_names = ["redis-server"] + +# SocialNetwork Memcached process names +sn_memcached_process_names = ["memcached"] + +# HotelResearvation service process names +hr_svc_process_names = [ + "geo", + "frontend", + "consul", + "profile", + "rate", + "recommendation", + "reservation", + "search", + "user", +] + +# HotelResearvation MongoDB process names +hr_mongod_process_names = ["mongod"] + +# HotelResearvation Memcached process names +hr_memcached_process_names = ["memcached"] + + +def get_pids_by_name_contain(search_term): + """ + Get a list of PIDs for processes whose command contains the given search term. + + :param search_term: The term to search for in process names (case-sensitive). + :return: A list of PIDs (integers) matching the search term. + """ + try: + result = subprocess.run( + ["ps", "-e", "-o", "pid,comm"], + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + if result.returncode != 0: + raise RuntimeError(f"Error running ps command: {result.stderr.strip()}") + + # Filter the output for lines containing the search term + matching_pids = [] + for line in result.stdout.splitlines(): + if search_term in line: + parts = line.split(maxsplit=1) + if parts: # Ensure we have at least one part + pid = parts[0] + if pid.isdigit(): + matching_pids.append(int(pid)) + + return matching_pids + + except Exception as e: + print(f"Error: {e}") + return [] + + +def get_pids_by_name(search_term): + """ + Get a list of PIDs for processes whose command exactly match the given search term. + + :param search_term: The term to search for in process names (case-sensitive). + :return: A list of PIDs (integers) matching exactly the search term. + """ + try: + result = subprocess.run( + ["ps", "-e", "-o", "pid,comm"], + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + if result.returncode != 0: + raise RuntimeError(f"Error running ps command: {result.stderr.strip()}") + + # Filter the output for lines containing the search term + matching_pids = [] + for line in result.stdout.splitlines(): + parts = line.split(maxsplit=1) + if len(parts) == 2: # Ensure we have both PID and command + pid, command = parts + if command == search_term: # Exact match check + if pid.isdigit(): + matching_pids.append(int(pid)) + + return matching_pids + + except Exception as e: + print(f"Error: {e}") + return [] + + +if __name__ == "__main__": + search_term = "HomeTimelineSer" + pids = get_pids_by_name(search_term) + print(f"Processes with '{search_term}' in their name: {pids}") diff --git a/sregym/generators/fault/inject_app.py b/sregym/generators/fault/inject_app.py new file mode 100644 index 0000000..ce119d2 --- /dev/null +++ b/sregym/generators/fault/inject_app.py @@ -0,0 +1,472 @@ +"""Inject faults at the application layer: Code, MongoDB, Redis, etc.""" + +import base64 +import textwrap +import time + +from kubernetes import client + +from sregym.generators.fault.base import FaultInjector +from sregym.service.kubectl import KubeCtl + + +class ApplicationFaultInjector(FaultInjector): + def __init__(self, namespace: str): + self.namespace = namespace + self.kubectl = KubeCtl() + self.mongo_service_pod_map = {"mongodb-rate": "rate", "mongodb-geo": "geo"} + + def delete_service_pods(self, target_service_pods: list[str]): + """Kill the corresponding service pod to enforce the fault.""" + for pod in target_service_pods: + delete_pod_command = f"kubectl delete pod {pod} -n {self.namespace}" + delete_result = self.kubectl.exec_command(delete_pod_command) + print(f"Deleted service pod {pod} to enforce the fault: {delete_result}") + + ############# FAULT LIBRARY ################ + # A.1 - revoke_auth: Revoke admin privileges in MongoDB - Auth + def inject_revoke_auth(self, microservices: list[str]): + """Inject a fault to revoke admin privileges in MongoDB.""" + print(f"Microservices to inject: {microservices}") + target_services = ["mongodb-rate", "mongodb-geo"] + for service in target_services: + if service in microservices: + pods = self.kubectl.list_pods(self.namespace) + # print(pods) + target_mongo_pods = [pod.metadata.name for pod in pods.items if service in pod.metadata.name] + print(f"Target MongoDB Pods: {target_mongo_pods}") + + # Find the corresponding service pod + target_service_pods = [ + pod.metadata.name + for pod in pods.items + if self.mongo_service_pod_map[service] in pod.metadata.name and "mongodb-" not in pod.metadata.name + ] + print(f"Target Service Pods: {target_service_pods}") + + for pod in target_mongo_pods: + if service == "mongodb-rate": + revoke_command = f"kubectl exec -it {pod} -n {self.namespace} -- /bin/bash /scripts/revoke-admin-rate-mongo.sh" + elif service == "mongodb-geo": + revoke_command = f"kubectl exec -it {pod} -n {self.namespace} -- /bin/bash /scripts/revoke-admin-geo-mongo.sh" + result = self.kubectl.exec_command(revoke_command) + print(f"Injection result for {service}: {result}") + + self.delete_service_pods(target_service_pods) + time.sleep(3) + + def recover_revoke_auth(self, microservices: list[str]): + target_services = ["mongodb-rate", "mongodb-geo"] + for service in target_services: + print(f"Microservices to recover: {microservices}") + if service in microservices: + pods = self.kubectl.list_pods(self.namespace) + target_mongo_pods = [pod.metadata.name for pod in pods.items if service in pod.metadata.name] + print(f"Target MongoDB Pods for recovery: {target_mongo_pods}") + + # Find the corresponding service pod + target_service_pods = [ + pod.metadata.name for pod in pods.items if self.mongo_service_pod_map[service] in pod.metadata.name + ] + for pod in target_mongo_pods: + if service == "mongodb-rate": + recover_command = f"kubectl exec -it {pod} -n {self.namespace} -- /bin/bash /scripts/revoke-mitigate-admin-rate-mongo.sh" + elif service == "mongodb-geo": + recover_command = f"kubectl exec -it {pod} -n {self.namespace} -- /bin/bash /scripts/revoke-mitigate-admin-geo-mongo.sh" + result = self.kubectl.exec_command(recover_command) + print(f"Recovery result for {service}: {result}") + + self.delete_service_pods(target_service_pods) + + # A.2 - storage_user_unregistered: User not registered in MongoDB - Storage/Net + def inject_storage_user_unregistered(self, microservices: list[str]): + """Inject a fault to create an unregistered user in MongoDB.""" + target_services = ["mongodb-rate", "mongodb-geo"] + for service in target_services: + if service in microservices: + pods = self.kubectl.list_pods(self.namespace) + target_mongo_pods = [pod.metadata.name for pod in pods.items if service in pod.metadata.name] + print(f"Target MongoDB Pods: {target_mongo_pods}") + + target_service_pods = [ + pod.metadata.name + for pod in pods.items + if pod.metadata.name.startswith(self.mongo_service_pod_map[service]) + ] + for pod in target_mongo_pods: + revoke_command = ( + f"kubectl exec -it {pod} -n {self.namespace} -- /bin/bash /scripts/remove-admin-mongo.sh" + ) + result = self.kubectl.exec_command(revoke_command) + print(f"Injection result for {service}: {result}") + + self.delete_service_pods(target_service_pods) + + def recover_storage_user_unregistered(self, microservices: list[str]): + target_services = ["mongodb-rate", "mongodb-geo"] + for service in target_services: + if service in microservices: + pods = self.kubectl.list_pods(self.namespace) + target_mongo_pods = [pod.metadata.name for pod in pods.items if service in pod.metadata.name] + print(f"Target MongoDB Pods: {target_mongo_pods}") + + target_service_pods = [ + pod.metadata.name + for pod in pods.items + if pod.metadata.name.startswith(self.mongo_service_pod_map[service]) + ] + for pod in target_mongo_pods: + if service == "mongodb-rate": + revoke_command = f"kubectl exec -it {pod} -n {self.namespace} -- /bin/bash /scripts/remove-mitigate-admin-rate-mongo.sh" + elif service == "mongodb-geo": + revoke_command = f"kubectl exec -it {pod} -n {self.namespace} -- /bin/bash /scripts/remove-mitigate-admin-geo-mongo.sh" + result = self.kubectl.exec_command(revoke_command) + print(f"Recovery result for {service}: {result}") + + self.delete_service_pods(target_service_pods) + + # A.3 - misconfig_app: pull the buggy config of the application image - Misconfig + def inject_misconfig_app(self, microservices: list[str]): + """Inject a fault by pulling a buggy config of the application image. + + NOTE: currently only the geo microservice has a buggy image. + """ + for service in microservices: + # Get the deployment associated with the service + deployment = self.kubectl.get_deployment(service, self.namespace) + if deployment: + # Modify the image to use the buggy image + for container in deployment.spec.template.spec.containers: + if container.name == f"hotel-reserv-{service}": + container.image = "yinfangchen/geo:app3" + self.kubectl.update_deployment(service, self.namespace, deployment) + time.sleep(10) + + def recover_misconfig_app(self, microservices: list[str]): + for service in microservices: + deployment = self.kubectl.get_deployment(service, self.namespace) + if deployment: + for container in deployment.spec.template.spec.containers: + if container.name == f"hotel-reserv-{service}": + container.image = f"yinfangchen/hotelreservation:latest" + self.kubectl.update_deployment(service, self.namespace, deployment) + + # A.4 valkey_auth_disruption: Invalidate the password in valkey so dependent services cannot work + def inject_valkey_auth_disruption(self, target_service="cart"): + pods = self.kubectl.list_pods(self.namespace) + valkey_pods = [p.metadata.name for p in pods.items if "valkey-cart" in p.metadata.name] + if not valkey_pods: + print("[❌] No Valkey pod found!") + return + + valkey_pod = valkey_pods[0] + print(f"[🔐] Found Valkey pod: {valkey_pod}") + command = f"kubectl exec -n {self.namespace} {valkey_pod} -- valkey-cli CONFIG SET requirepass 'invalid_pass'" + result = self.kubectl.exec_command(command) + print(f"[⚠️] Injection result: {result}") + + # Restart cartservice to force it to re-authenticate + self.kubectl.exec_command(f"kubectl delete pod -l app.kubernetes.io/name={target_service} -n {self.namespace}") + time.sleep(3) + + def recover_valkey_auth_disruption(self, target_service="cart"): + pods = self.kubectl.list_pods(self.namespace) + valkey_pods = [p.metadata.name for p in pods.items if "valkey-cart" in p.metadata.name] + if not valkey_pods: + print("[❌] No Valkey pod found for recovery!") + return + + valkey_pod = valkey_pods[0] + print(f"[🔓] Found Valkey pod: {valkey_pod}") + command = f"kubectl exec -n {self.namespace} {valkey_pod} -- valkey-cli CONFIG SET requirepass ''" + result = self.kubectl.exec_command(command) + print(f"[✅] Recovery result: {result}") + + # Restart cartservice to restore normal behavior + self.kubectl.exec_command(f"kubectl delete pod -l app.kubernetes.io/name={target_service} -n {self.namespace}") + time.sleep(3) + + # A.5 valkey_memory disruption: Write large 10MB payloads to the valkey store making it go into OOM state + def inject_valkey_memory_disruption(self): + print("Injecting Valkey memory disruption via in-cluster job...") + + script = textwrap.dedent( + """ + import redis + import threading + import time + + def flood_redis(): + client = redis.Redis(host='valkey-cart', port=6379) + while True: + try: + payload = 'x' * 1000000 + client.set(f"key_{time.time()}", payload) + except Exception as e: + print(f"Error: {e}") + time.sleep(1) + + threads = [] + for _ in range(10): + t = threading.Thread(target=flood_redis) + t.start() + threads.append(t) + + for t in threads: + t.join() + """ + ).strip() + + encoded_script = base64.b64encode(script.encode()).decode() + + job_spec = { + "apiVersion": "batch/v1", + "kind": "Job", + "metadata": { + "name": "valkey-memory-flood", + "namespace": self.namespace, + }, + "spec": { + "template": { + "spec": { + "restartPolicy": "Never", + "containers": [ + { + "name": "flooder", + "image": "python:3.10-slim", + "command": [ + "sh", + "-c", + f"pip install redis && python3 -c \"import base64; exec(base64.b64decode('{encoded_script}'))\"", + ], + } + ], + } + } + }, + } + + batch_v1 = client.BatchV1Api() + batch_v1.create_namespaced_job(namespace=self.namespace, body=job_spec) + print("Valkey memory flood job submitted.") + + def recover_valkey_memory_disruption(self): + print("Cleaning up Valkey memory flood job...") + batch_v1 = client.BatchV1Api() + try: + batch_v1.delete_namespaced_job( + name="valkey-memory-flood", + namespace=self.namespace, + propagation_policy="Foreground", + ) + print("Job deleted.") + except Exception as e: + print(f"Error deleting job: {e}") + + # A.5 incorrect_port_assignment: Update an env var to use the wrong port value + def inject_incorrect_port_assignment( + self, deployment_name: str, component_label: str, env_var: str, incorrect_port: str = "8082" + ): + """ + Patch the deployment to modify a specific environment variable (e.g., PRODUCT_CATALOG_SERVICE_ADDR) + to an incorrect port (e.g., 8082 instead of 8080). + """ + # Fetch current deployment + deployment = self.kubectl.get_deployment(deployment_name, self.namespace) + container = deployment.spec.template.spec.containers[0] + container_name = container.name + current_env = container.env + + # Modify the target env var + updated_env = [] + found = False + for e in current_env: + if e.name == env_var: + updated_env.append(client.V1EnvVar(name=env_var, value=f"{e.value.split(':')[0]}:{incorrect_port}")) + found = True + else: + updated_env.append(e) + + if not found: + raise ValueError(f"Environment variable '{env_var}' not found in deployment '{deployment_name}'") + + # Create patch body + patch_body = { + "spec": { + "template": { + "spec": { + "containers": [ + { + "name": container_name, + "env": [{"name": var.name, "value": var.value} for var in updated_env], + } + ] + } + } + } + } + + self.kubectl.patch_deployment(deployment_name, self.namespace, patch_body) + print(f"Injected incorrect port assignment in {env_var} of {deployment_name}.") + + def recover_incorrect_port_assignment(self, deployment_name: str, env_var: str, correct_port: str = "8080"): + """ + Revert the previously patched environment variable (e.g., PRODUCT_CATALOG_SERVICE_ADDR) + to use the correct port (e.g., 8080). + """ + # Fetch current deployment + deployment = self.kubectl.get_deployment(deployment_name, self.namespace) + container = deployment.spec.template.spec.containers[0] + container_name = container.name + current_env = container.env + + # Revert the target env var + updated_env = [] + found = False + for e in current_env: + if e.name == env_var: + base_host = e.value.split(":")[0] + updated_env.append(client.V1EnvVar(name=env_var, value=f"{base_host}:{correct_port}")) + found = True + else: + updated_env.append(e) + + if not found: + raise ValueError(f"Environment variable '{env_var}' not found in deployment '{deployment_name}'") + + # Create patch body + patch_body = { + "spec": { + "template": { + "spec": { + "containers": [ + { + "name": container_name, + "env": [{"name": var.name, "value": var.value} for var in updated_env], + } + ] + } + } + } + } + + self.kubectl.patch_deployment(deployment_name, self.namespace, patch_body) + print(f"Recovered {env_var} in {deployment_name} to use port {correct_port}.") + + # A.6 incorrect_image: checkout service is updated to use a bad image + def inject_incorrect_image(self, deployment_name: str, namespace: str, bad_image: str = "app-image:latest"): + # Get current deployment for container name + deployment = self.kubectl.get_deployment(deployment_name, namespace) + container_name = deployment.spec.template.spec.containers[0].name + # Set replicas to 0 before updating image + self.kubectl.patch_deployment(name=deployment_name, namespace=namespace, patch_body={"spec": {"replicas": 0}}) + + # Patch image + self.kubectl.patch_deployment( + name=deployment_name, + namespace=namespace, + patch_body={"spec": {"template": {"spec": {"containers": [{"name": container_name, "image": bad_image}]}}}}, + ) + + # Restore replicas to 1 + self.kubectl.patch_deployment(name=deployment_name, namespace=namespace, patch_body={"spec": {"replicas": 1}}) + + def recover_incorrect_image(self, deployment_name: str, namespace: str, correct_image: str): + deployment = self.kubectl.get_deployment(deployment_name, namespace) + container_name = deployment.spec.template.spec.containers[0].name + + self.kubectl.patch_deployment( + name=deployment_name, + namespace=namespace, + patch_body={ + "spec": {"template": {"spec": {"containers": [{"name": container_name, "image": correct_image}]}}} + }, + ) + + + def inject_missing_env_variable(self, deployment_name: str, env_var: str): + """ + Patch the deployment to delete a specific environment variable. + """ + # Fetch current deployment + try: + deployment = self.kubectl.get_deployment(deployment_name, self.namespace) + container = deployment.spec.template.spec.containers[0] + current_env = container.env + except Exception as e: + raise ValueError(f"Failed to get deployment '{deployment_name}': {e}") + + # Remove the target env var + updated_env = [] + found = False + for e in current_env: + if e.name == env_var: + found = True + # Skip this environment variable (delete it) + continue + else: + updated_env.append(e) + + if not found: + raise ValueError(f"Environment variable '{env_var}' not found in deployment '{deployment_name}'") + + # Update the container's env list + container.env = updated_env + + # Use update_deployment instead of patch_deployment + self.kubectl.update_deployment(deployment_name, self.namespace, deployment) + print(f"Deleted environment variable '{env_var}' from deployment '{deployment_name}'.") + + def recover_missing_env_variable(self, deployment_name: str, env_var: str, env_value: str): + """ + Restore the previously deleted environment variable. + """ + # Fetch current deployment + try: + deployment = self.kubectl.get_deployment(deployment_name, self.namespace) + container = deployment.spec.template.spec.containers[0] + container_name = container.name + current_env = container.env + except Exception as e: + raise ValueError(f"Failed to get deployment '{deployment_name}': {e}") + + # Check if env var already exists + for e in current_env: + if e.name == env_var: + print(f"Environment variable '{env_var}' already exists in deployment '{deployment_name}'.") + return + + # Add the environment variable back + updated_env = list(current_env) + updated_env.append(client.V1EnvVar(name=env_var, value=env_value)) + + # Create patch body + patch_body = { + "spec": { + "template": { + "spec": { + "containers": [ + { + "name": container_name, + "env": [{"name": var.name, "value": var.value} for var in updated_env], + } + ] + } + } + } + } + + self.kubectl.patch_deployment(deployment_name, self.namespace, patch_body) + print(f"Restored environment variable '{env_var}' with value '{env_value}' to deployment '{deployment_name}'.") + + +if __name__ == "__main__": + namespace = "hotel-reservation" + # microservices = ["geo"] + microservices = ["mongodb-geo"] + # fault_type = "misconfig_app" + fault_type = "storage_user_unregistered" + print("Start injection/recover ...") + injector = ApplicationFaultInjector(namespace) + # injector._inject(fault_type, microservices) + injector._recover(fault_type, microservices) diff --git a/sregym/generators/fault/inject_hw.py b/sregym/generators/fault/inject_hw.py new file mode 100644 index 0000000..7a18b09 --- /dev/null +++ b/sregym/generators/fault/inject_hw.py @@ -0,0 +1,294 @@ +import json +import shlex +import subprocess +from typing import List, Tuple + +from sregym.generators.fault.base import FaultInjector +from sregym.service.kubectl import KubeCtl + + +class HWFaultInjector(FaultInjector): + """ + Fault injector that calls the Khaos DaemonSet to inject syscall-level faults + against *host* PIDs corresponding to workload pods. + """ + + def __init__(self, khaos_namespace: str = "khaos", khaos_label: str = "app=khaos"): + self.kubectl = KubeCtl() + self.khaos_ns = khaos_namespace + self.khaos_daemonset_label = khaos_label + + def inject(self, microservices: List[str], fault_type: str): + for pod_ref in microservices: + ns, pod = self._split_ns_pod(pod_ref) + node = self._get_pod_node(ns, pod) + container_id = self._get_container_id(ns, pod) + host_pid = self._get_host_pid_on_node(node, container_id) + self._exec_khaos_fault_on_node(node, fault_type, host_pid) + + def inject_node(self, namespace: str, fault_type: str, target_node: str = None): + + if target_node: + selected_node = self._find_node_starting_with(target_node) + if not selected_node: + print(f"Node starting with '{target_node}' not found, selecting node with most pods") + selected_node = self._find_node_with_most_pods(namespace) + else: + selected_node = self._find_node_with_most_pods(namespace) + + print(f"Selected target node: {selected_node}") + + target_pods = self._get_pods_on_node(namespace, selected_node) + if not target_pods: + raise RuntimeError(f"No running pods found on node '{selected_node}' in namespace '{namespace}'") + + print(f"Found {len(target_pods)} pods on node {selected_node}: {', '.join(target_pods)}") + + self.inject(target_pods, fault_type) + return selected_node + + def recover_node(self, namespace: str, fault_type: str, target_node: str): + target_pods = self._get_pods_on_node(namespace, target_node) + if not target_pods: + print(f"[warn] No pods found on node {target_node}; attempting best-effort recovery.") + target_pods = [] + + self.recover(target_pods, fault_type) + + def recover(self, microservices: List[str], fault_type: str): + touched = set() + for pod_ref in microservices: + ns, pod = self._split_ns_pod(pod_ref) + node = self._get_pod_node(ns, pod) + if node in touched: + continue + self._exec_khaos_recover_on_node(node, fault_type) + touched.add(node) + + def _split_ns_pod(self, ref: str) -> Tuple[str, str]: + if "/" in ref: + ns, pod = ref.split("/", 1) + else: + ns, pod = "default", ref + return ns, pod + + def _jsonpath(self, ns: str, pod: str, path: str) -> str: + cmd = f"kubectl -n {shlex.quote(ns)} get pod {shlex.quote(pod)} -o jsonpath='{path}'" + out = self.kubectl.exec_command(cmd) + if isinstance(out, tuple): + out = out[0] + return (out or "").strip() + + def _get_pod_node(self, ns: str, pod: str) -> str: + node = self._jsonpath(ns, pod, "{.spec.nodeName}") + if not node: + raise RuntimeError(f"Pod {ns}/{pod} has no nodeName") + return node + + def _get_container_id(self, ns: str, pod: str) -> str: + # running container first + cid = self._jsonpath(ns, pod, "{.status.containerStatuses[0].containerID}") + if not cid: + cid = self._jsonpath(ns, pod, "{.status.initContainerStatuses[0].containerID}") + if not cid: + raise RuntimeError(f"Pod {ns}/{pod} has no containerID yet (not running?)") + if "://" in cid: + cid = cid.split("://", 1)[1] + return cid + + def _get_khaos_pod_on_node(self, node: str) -> str: + cmd = f"kubectl -n {shlex.quote(self.khaos_ns)} get pods -l {shlex.quote(self.khaos_daemonset_label)} -o json" + out = self.kubectl.exec_command(cmd) + if isinstance(out, tuple): + out = out[0] + data = json.loads(out or "{}") + for item in data.get("items", []): + if item.get("spec", {}).get("nodeName") == node and item.get("status", {}).get("phase") == "Running": + return item["metadata"]["name"] + raise RuntimeError(f"No running Khaos DS pod found on node {node}") + + def _get_host_pid_on_node(self, node: str, container_id: str) -> int: + pod_name = self._get_khaos_pod_on_node(node) + + # /proc scan (fast, works with hostPID:true) + try: + return self._get_host_pid_via_proc(pod_name, container_id) + except Exception: + pass + + # cgroup.procs search (works for both cgroup v1/v2) + try: + return self._get_host_pid_via_cgroups(pod_name, container_id) + except Exception: + pass + + raise RuntimeError( + f"Failed to resolve host PID for container {container_id} on node {node} (proc, cgroups, cri all failed)" + ) + + def _get_host_pid_via_proc(self, khaos_pod: str, container_id: str) -> int: + """ + Search host /proc/*/cgroup for the container ID and return the first PID. + With hostPID:true, /proc is the host's proc. + """ + short = shlex.quote(container_id[:12]) + cmd = [ + "kubectl", + "-n", + self.khaos_ns, + "exec", + khaos_pod, + "--", + "sh", + "-lc", + # grep cgroup entries for the container id; extract pid from path + f"grep -l {short} /proc/*/cgroup 2>/dev/null | sed -n 's#.*/proc/\\([0-9]\\+\\)/cgroup#\\1#p' | head -n1", + ] + pid_txt = subprocess.check_output(cmd, stderr=subprocess.DEVNULL, text=True).strip() + if pid_txt.isdigit(): + return int(pid_txt) + + # Try full ID if short didn't match + fullq = shlex.quote(container_id) + cmd[-1] = "sh -lc " + shlex.quote( + f"grep -l {fullq} /proc/*/cgroup 2>/dev/null | sed -n 's#.*/proc/\\([0-9]\\+\\)/cgroup#\\1#p' | head -n1" + ) + pid_txt = subprocess.check_output(cmd, stderr=subprocess.DEVNULL, text=True).strip() + if pid_txt.isdigit(): + return int(pid_txt) + + raise RuntimeError("proc scan found no matching PID") + + def _detect_cgroup_root(self, khaos_pod: str) -> str: + """ + Detect cgroup mount root (v2 unified vs v1). Returns a path under which cgroup.procs exists. + """ + candidates = [ + "/sys/fs/cgroup", # cgroup v2 (unified) + "/sys/fs/cgroup/systemd", # v1 systemd hierarchy + "/sys/fs/cgroup/memory", # v1 memory hierarchy + "/sys/fs/cgroup/pids", # v1 pids hierarchy + ] + for root in candidates: + cmd = [ + "kubectl", + "-n", + self.khaos_ns, + "exec", + khaos_pod, + "--", + "sh", + "-lc", + f"test -d {shlex.quote(root)} && echo OK || true", + ] + out = subprocess.check_output(cmd, stderr=subprocess.DEVNULL, text=True).strip() + if out == "OK": + return root + return "/sys/fs/cgroup" + + def _get_host_pid_via_cgroups(self, khaos_pod: str, container_id: str) -> int: + """ + Search cgroup.procs files whose path contains the container ID; return a PID from that file. + Works for both cgroup v1 and v2. + """ + root = self._detect_cgroup_root(khaos_pod) + short = shlex.quote(container_id[:12]) + cmd = [ + "kubectl", + "-n", + self.khaos_ns, + "exec", + khaos_pod, + "--", + "sh", + "-lc", + # find a cgroup.procs in any directory name/path that includes the short id; print first PID in that procs file + f"find {shlex.quote(root)} -type f -name cgroup.procs -path '*{short}*' 2>/dev/null | head -n1 | xargs -r head -n1", + ] + pid_txt = subprocess.check_output(cmd, stderr=subprocess.DEVNULL, text=True).strip() + if pid_txt.isdigit(): + return int(pid_txt) + + # Try with full ID + fullq = shlex.quote(container_id) + cmd[-1] = "sh -lc " + shlex.quote( + f"find {root} -type f -name cgroup.procs -path '*{fullq}*' 2>/dev/null | head -n1 | xargs -r head -n1" + ) + pid_txt = subprocess.check_output(cmd, stderr=subprocess.DEVNULL, text=True).strip() + if pid_txt.isdigit(): + return int(pid_txt) + + raise RuntimeError("cgroup search found no matching PID") + + def _exec_khaos_fault_on_node(self, node: str, fault_type: str, host_pid: int): + pod_name = self._get_khaos_pod_on_node(node) + cmd = ["kubectl", "-n", self.khaos_ns, "exec", pod_name, "--", "/khaos/khaos", fault_type, str(host_pid)] + subprocess.run(cmd, check=True) + + def _exec_khaos_recover_on_node(self, node: str, fault_type: str): + pod_name = self._get_khaos_pod_on_node(node) + cmd = ["kubectl", "-n", self.khaos_ns, "exec", pod_name, "--", "/khaos/khaos", "--recover", fault_type] + subprocess.run(cmd, check=True) + + def _get_all_nodes(self) -> List[str]: + """Get all node names in the cluster.""" + cmd = "kubectl get nodes -o jsonpath='{.items[*].metadata.name}'" + out = self.kubectl.exec_command(cmd) + if isinstance(out, tuple): + out = out[0] + nodes = (out or "").strip().split() + return [node for node in nodes if node] + + def _find_node_starting_with(self, target_node: str) -> str: + """Find a node that starts with the given string.""" + all_nodes = self._get_all_nodes() + for node in all_nodes: + if node.startswith(target_node): + return node + return None + + def _find_node_with_most_pods(self, namespace: str) -> str: + """Find the node with the most pods in the namespace.""" + node_pod_count = {} + + cmd = f"kubectl -n {namespace} get pods -o json" + out = self.kubectl.exec_command(cmd) + if isinstance(out, tuple): + out = out[0] + try: + data = json.loads(out) + for item in data.get("items", []): + phase = item.get("status", {}).get("phase") + node_name = item.get("spec", {}).get("nodeName") + if phase == "Running" and node_name: + node_pod_count[node_name] = node_pod_count.get(node_name, 0) + 1 + except Exception as e: + print(f"Error getting pods: {e}") + return None + + if not node_pod_count: + raise RuntimeError(f"No running pods found in namespace '{namespace}'") + + selected_node = max(node_pod_count, key=node_pod_count.get) + print(f"Node {selected_node} has {node_pod_count[selected_node]} pods") + return selected_node + + def _get_pods_on_node(self, namespace: str, target_node: str) -> List[str]: + """Get all pods in namespace on the target node.""" + pods: List[str] = [] + + cmd = f"kubectl -n {namespace} get pods -o json" + out = self.kubectl.exec_command(cmd) + if isinstance(out, tuple): + out = out[0] + try: + data = json.loads(out) + for item in data.get("items", []): + phase = item.get("status", {}).get("phase") + node_name = item.get("spec", {}).get("nodeName") + if phase == "Running" and node_name == target_node: + pods.append(f"{namespace}/{item['metadata']['name']}") + except Exception as e: + print(f"Error getting pods: {e}") + + return pods diff --git a/sregym/generators/fault/inject_kernel.py b/sregym/generators/fault/inject_kernel.py new file mode 100644 index 0000000..f3379c6 --- /dev/null +++ b/sregym/generators/fault/inject_kernel.py @@ -0,0 +1,776 @@ +import json +import shlex +import subprocess +from typing import Dict, Iterable, Optional, Tuple + +from sregym.service.kubectl import KubeCtl + +# Constants +DEBUGFS_ROOT = "/sys/kernel/debug" +DEFAULT_KHAOS_NS = "khaos" +DEFAULT_KHAOS_LABEL = "app=khaos" +DEFAULT_LOOP_IMAGE = "/var/tmp/khaos-fault.img" +DEFAULT_DM_FLAKEY_NAME = "khaos_flakey0" +DEFAULT_DM_DUST_NAME = "khaos_dust1" +DEFAULT_DM_LSE_NAME = "khaos_lse" +DEFAULT_BLOCK_SIZE = 512 +DEFAULT_SIZE_GB = 5 +PVC_BOUND_TIMEOUT = "60s" + +# Supported fault capability directories under debugfs +FAULT_CAPS = { + "failslab": f"{DEBUGFS_ROOT}/failslab", + "fail_page_alloc": f"{DEBUGFS_ROOT}/fail_page_alloc", + "fail_futex": f"{DEBUGFS_ROOT}/fail_futex", + "fail_make_request": f"{DEBUGFS_ROOT}/fail_make_request", + "fail_function": f"{DEBUGFS_ROOT}/fail_function", + # add more if you enable them on your kernel (e.g., NVMe fault injectors) +} + + +class KernelInjector: + """ + Control Linux kernel fault-injection infrastructure via debugfs from a Khaos DaemonSet pod. + + Typical use: + kf = KernelInjector(kubectl, khaos_ns="khaos", khaos_label="app=khaos") + kf.enable_fault(node="nodeX", cap="fail_page_alloc", probability=5, interval=1, times=-1) + kf.set_task_filter_pids(node="nodeX", pids=[1234, 5678], enabled=True) # scope to those PIDs + ... + kf.disable_fault(node="nodeX", cap="fail_page_alloc") + kf.set_task_filter_pids(node="nodeX", pids=[1234, 5678], enabled=False) + + You can also inject function-specific errors: + kf.fail_function_add(node, func="open_ctree", retval=-12) + kf.fail_function_clear(node) + + And systematic "Nth call fails" per-task: + kf.set_fail_nth(node, pid=1234, nth=10) # the task's 10th faultable call fails + """ + + def __init__( + self, kubectl: KubeCtl, khaos_ns: str = DEFAULT_KHAOS_NS, khaos_label: str = DEFAULT_KHAOS_LABEL + ): + self.kubectl = kubectl + self.khaos_ns = khaos_ns + self.khaos_label = khaos_label + self._pod_cache: Dict[str, str] = {} # Cache pod names by node + self.recovery_data: Optional[Dict[str, str]] = None + + # ---------- Public API ---------- + + def enable_fault( + self, + node: str, + cap: str, + *, + probability: int = 100, + interval: int = 1, + times: int = -1, + space: int = 0, + verbose: int = 1, + extra: Optional[Dict[str, str]] = None, + ) -> None: + """Enable a fault capability (e.g., fail_page_alloc) with the given knobs.""" + pod = self._get_khaos_pod_on_node(node) + cap_path = self._cap_path_checked(pod, cap) + self._ensure_debugfs(pod) + + # Core knobs + knobs = { + "probability": str(int(probability)), + "interval": str(int(interval)), + "times": str(int(times)), + "space": str(int(space)), + "verbose": str(int(verbose)), + } + if extra: + knobs.update({k: str(v) for k, v in extra.items()}) + + for key, value in knobs.items(): + self._write(pod, f"{cap_path}/{key}", value) + + def disable_fault(self, node: str, cap: str) -> None: + """Disable a fault capability by setting probability=0.""" + pod = self._get_khaos_pod_on_node(node) + cap_path = self._cap_path_checked(pod, cap) + self._write(pod, f"{cap_path}/probability", "0") + + def set_task_filter(self, node: str, cap: str, enabled: bool) -> None: + """Enable/disable task-filter for a capability (then mark PIDs with /proc//make-it-fail=1).""" + pod = self._get_khaos_pod_on_node(node) + cap_path = self._cap_path_checked(pod, cap) + self._write(pod, f"{cap_path}/task-filter", "Y" if enabled else "N") + + def set_task_filter_pids(self, node: str, pids: Iterable[int], enabled: bool) -> None: + """ + Toggle /proc//make-it-fail for each PID so task-filtered faults only hit those tasks. + NOTE: This affects *all* capabilities with task-filter=Y. + """ + pod = self._get_khaos_pod_on_node(node) + val = "1" if enabled else "0" + for pid in pids: + self._write(pod, f"/proc/{int(pid)}/make-it-fail", val, must_exist=False) + + # --------- fail_function helpers --------- + + def fail_function_add(self, node: str, func: str, retval: int) -> None: + """ + Add a function to the injection list and set its retval. + The function must be annotated with ALLOW_ERROR_INJECTION() in the kernel. + """ + pod = self._get_khaos_pod_on_node(node) + base = self._cap_path_checked(pod, "fail_function") + self._write(pod, f"{base}/inject", func) + self._write(pod, f"{base}/{func}/retval", str(int(retval))) + + # Typical default knobs to ensure it triggers: + self.enable_fault(node, "fail_function", probability=100, interval=1, times=-1, verbose=1) + + def fail_function_remove(self, node: str, func: str) -> None: + """Remove a function from the injection list.""" + pod = self._get_khaos_pod_on_node(node) + base = self._cap_path_checked(pod, "fail_function") + # '!' prefix removes a function from injection list + self._write(pod, f"{base}/inject", f"!{func}") + + def fail_function_clear(self, node: str) -> None: + """Clear all functions from the injection list.""" + pod = self._get_khaos_pod_on_node(node) + base = self._cap_path_checked(pod, "fail_function") + # empty string clears the list + self._write(pod, f"{base}/inject", "") + + # --------- per-task "Nth call fails" --------- + + def set_fail_nth(self, node: str, pid: int, nth: int) -> None: + """ + Systematic faulting: write N to /proc//fail-nth — that task's Nth faultable call will fail. + Takes precedence over probability/interval. + """ + pod = self._get_khaos_pod_on_node(node) + self._write(pod, f"/proc/{int(pid)}/fail-nth", str(int(nth)), must_exist=True) + + # ---------- Internals ---------- + + def _get_khaos_pod_on_node(self, node: str) -> str: + """Get the Khaos pod name on the specified node, with caching.""" + if node in self._pod_cache: + return self._pod_cache[node] + + cmd = f"kubectl -n {shlex.quote(self.khaos_ns)} get pods -l {shlex.quote(self.khaos_label)} -o json" + out = self.kubectl.exec_command(cmd) + if not out: + raise RuntimeError(f"Failed to get pods: empty response") + + data = json.loads(out) + for item in data.get("items", []): + if item.get("spec", {}).get("nodeName") == node and item.get("status", {}).get("phase") == "Running": + pod_name = item["metadata"]["name"] + self._pod_cache[node] = pod_name + return pod_name + + raise RuntimeError(f"No running Khaos DS pod found on node {node}") + + def _cap_path_checked(self, pod: str, cap: str) -> str: + """Validate and return the capability path.""" + if cap not in FAULT_CAPS: + raise ValueError(f"Unsupported fault capability '{cap}'. Known: {', '.join(FAULT_CAPS)}") + path = FAULT_CAPS[cap] + if not self._exists(pod, path): + raise RuntimeError( + f"Capability path not found in pod {pod}: {path}. " + f"Is debugfs mounted and the kernel built with {cap}?" + ) + return path + + def _ensure_debugfs(self, pod: str) -> None: + """Ensure debugfs is mounted.""" + if self._exists(pod, DEBUGFS_ROOT): + return + # Try to mount (usually not needed; your DS mounts host /sys/kernel/debug) + self._sh(pod, f"mount -t debugfs none {shlex.quote(DEBUGFS_ROOT)} || true") + + # --- pod exec helpers --- + + def _exists(self, pod: str, path: str) -> bool: + """Check if a path exists in the pod.""" + cmd = ( + f"kubectl -n {shlex.quote(self.khaos_ns)} exec {shlex.quote(pod)} -- " + f"sh -lc 'test -e {shlex.quote(path)} && echo OK || true'" + ) + out = self.kubectl.exec_command(cmd) + return (out or "").strip() == "OK" + + def _write(self, pod: str, path: str, value: str, *, must_exist: bool = True) -> None: + """Write a value to a path in the pod.""" + cmd = [ + "kubectl", + "-n", + self.khaos_ns, + "exec", + pod, + "--", + "sh", + "-lc", + f"printf %s {shlex.quote(value)} > {shlex.quote(path)} 2>/dev/null || true", + ] + rc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + if must_exist and rc.returncode != 0: + raise RuntimeError(f"Failed to write '{value}' to {path} in {pod}: rc={rc.returncode}, err={rc.stderr}") + + def _sh(self, pod: str, script: str) -> str: + """Execute a shell script in the pod.""" + cmd = ["kubectl", "-n", self.khaos_ns, "exec", pod, "--", "sh", "-lc", script] + out = self.kubectl.exec_command(" ".join(shlex.quote(x) for x in cmd)) + return out or "" + + def _exec_on_node(self, node: str, script: str) -> str: + """Execute a script on the node using nsenter (runs in the Khaos pod on that node).""" + pod = self._get_khaos_pod_on_node(node) + cmd = [ + "kubectl", + "-n", + self.khaos_ns, + "exec", + pod, + "--", + "nsenter", + "-t", + "1", + "-m", + "-u", + "-i", + "-n", + "-p", + "sh", + "-c", + script, + ] + out = self.kubectl.exec_command(" ".join(shlex.quote(x) for x in cmd)) + return out or "" + + def _exec_with_nsenter_mount(self, node: str, script: str, check: bool = True) -> Tuple[int, str, str]: + """Execute a script using nsenter with mount namespace, returns (returncode, stdout, stderr).""" + pod = self._get_khaos_pod_on_node(node) + cmd = [ + "kubectl", + "-n", + self.khaos_ns, + "exec", + pod, + "--", + "nsenter", + "--mount=/proc/1/ns/mnt", + "bash", + "-lc", + script, + ] + rc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + if check and rc.returncode != 0: + raise RuntimeError( + f"Command failed on node {node}: rc={rc.returncode}, stdout={rc.stdout}, stderr={rc.stderr}" + ) + return rc.returncode, rc.stdout, rc.stderr + + # ---------- loopback "test disk" helpers ---------- + + def _loop_create(self, node: str, size_gb: int = DEFAULT_SIZE_GB) -> str: + """Create a sparse file and attach a loop device. Returns /dev/loopN.""" + script = rf""" +set -e +mkdir -p /var/tmp +IMG={shlex.quote(DEFAULT_LOOP_IMAGE)} +[ -e "$IMG" ] || fallocate -l {int(size_gb)}G "$IMG" +LOOP=$(losetup -f --show "$IMG") +echo "$LOOP" +""" + return self._exec_on_node(node, script).strip() + + def _loop_destroy(self, node: str) -> None: + """Detach loop device created by _loop_create (best-effort).""" + script = rf""" +IMG={shlex.quote(DEFAULT_LOOP_IMAGE)} +if losetup -j "$IMG" | awk -F: '{{print $1}}' | grep -q '/dev/loop'; then + losetup -j "$IMG" | awk -F: '{{print $1}}' | xargs -r -n1 losetup -d || true +fi +""" + self._exec_on_node(node, script) + + # ---------- dm-flakey ---------- + + def dm_flakey_create( + self, node: str, name: str, dev: str, up_s: int, down_s: int, offset_sectors: int = 0, features: str = "" + ) -> None: + """ + Create a flakey device: + table: "0 flakey [1 ]" + features examples: + - "drop_writes" + - "error_writes" + - "corrupt_bio_byte 32 r 1 0" + """ + dev_q = shlex.quote(dev) + name_q = shlex.quote(name) + feat_tail = f" 1 {features}" if features else "" + script = rf""" +set -e +modprobe dm_flakey || true +SECTORS=$(blockdev --getsz {dev_q}) +dmsetup create {name_q} --table "0 $SECTORS flakey {dev_q} {int(offset_sectors)} {int(up_s)} {int(down_s)}{feat_tail}" +""" + self._exec_on_node(node, script) + + def dm_target_remove(self, node: str, name: str) -> None: + """Remove a device mapper target.""" + self._exec_on_node(node, f"dmsetup remove {shlex.quote(name)} 2>/dev/null || true") + + def dm_flakey_reload( + self, + node: str, + name: str, + up_interval: int, + down_interval: int, + features: str = "", + offset_sectors: int = 0, + num_features: Optional[int] = None, + ) -> None: + """Reload a flakey device with new parameters.""" + name_q = shlex.quote(name) + if features: + if num_features is None: + count = len(features.split()) + else: + count = num_features + feat_tail = f" {count} {features}" + else: + feat_tail = "" + + script = rf""" +set -e +# Get the underlying device from current table +UNDERLYING=$(dmsetup table {name_q} | awk '{{print $4}}') +SECTORS=$(dmsetup table {name_q} | awk '{{print $2}}') + +echo "Reloading {name_q}: up={up_interval}s down={down_interval}s features='{features}'" +echo "Underlying device: $UNDERLYING, Sectors: $SECTORS" + +# Reload the table with new parameters +dmsetup reload {name_q} --table "0 $SECTORS flakey $UNDERLYING {int(offset_sectors)} {int(up_interval)} {int(down_interval)}{feat_tail}" + +# Activate the new table (this is atomic, no unmount needed) +dmsetup resume {name_q} + +echo "dm-flakey device reloaded successfully" +dmsetup status {name_q} +""" + result = self._exec_on_node(node, script) + print(f"[dm-flakey] Reload result: {result.strip()}") + + # ---------- dm-dust ---------- + + def dm_dust_create(self, node: str, name: str, dev: str, blksz: int = DEFAULT_BLOCK_SIZE, offset: int = 0) -> None: + """ + Create a dust device that can simulate bad sectors. + table: "0 dust " + """ + dev_q = shlex.quote(dev) + name_q = shlex.quote(name) + script = rf""" +set -e +modprobe dm_dust || true +SECTORS=$(blockdev --getsz {dev_q}) +dmsetup create {name_q} --table "0 $SECTORS dust {dev_q} {int(offset)} {int(blksz)}" +""" + self._exec_on_node(node, script) + + def dm_dust_add_badblocks(self, node: str, name: str, blocks: list[int]) -> None: + """Add bad blocks to a dust device.""" + name_q = shlex.quote(name) + blocks_str = " ".join(str(int(b)) for b in blocks) + + script = f""" +DM_NAME={name_q} +BLOCKS="{blocks_str}" +SUCCESS=0 +FAILED=0 +for BLOCK in $BLOCKS; do + if dmsetup message $DM_NAME 0 addbadblock $BLOCK 2>/dev/null; then + SUCCESS=$((SUCCESS + 1)) + else + FAILED=$((FAILED + 1)) + fi +done +echo "Added $SUCCESS bad blocks, $FAILED already existed or failed" +""" + result = self._exec_on_node(node, script) + print(f"[dm-dust] {result.strip()}") + + def dm_dust_add_badblocks_range(self, node: str, name: str, start: int, end: int, step: int) -> None: + """Add bad blocks using parallel execution with xargs for speed.""" + name_q = shlex.quote(name) + + script = f""" +echo "Adding bad blocks from {start} to {end} with step {step} (parallel)..." +START_TIME=$(date +%s) + +seq {start} {step} {end} | xargs -P 32 -I {{}} sh -c 'dmsetup message {name_q} 0 addbadblock {{}} 2>/dev/null' || true + +END_TIME=$(date +%s) +DURATION=$((END_TIME - START_TIME)) +COUNT=$(seq {start} {step} {end} | wc -l) +echo "Completed: Added approximately $COUNT bad blocks in $DURATION seconds" +""" + result = self._exec_on_node(node, script) + print(f"[dm-dust] {result.strip()}") + + def dm_dust_enable(self, node: str, name: str) -> None: + """Enable bad block simulation on a dust device.""" + name_q = shlex.quote(name) + result = self._exec_on_node(node, f"dmsetup message {name_q} 0 enable && dmsetup status {name_q}") + print(f"[dm-dust] Enabled. Status: {result.strip()}") + + def dm_dust_disable(self, node: str, name: str) -> None: + """Disable bad block simulation on a dust device.""" + name_q = shlex.quote(name) + result = self._exec_on_node(node, f"dmsetup message {name_q} 0 disable && dmsetup status {name_q}") + print(f"[dm-dust] Disabled. Status: {result.strip()}") + + def dm_dust_clear(self, node: str, name: str) -> None: + """Clear all bad blocks from the device.""" + name_q = shlex.quote(name) + result = self._exec_on_node(node, f"dmsetup message {name_q} 0 clearbadblocks 2>&1 || true") + print(f"[dm-dust] Clear bad blocks: {result.strip()}") + + def dm_dust_list(self, node: str, name: str) -> str: + """List all bad blocks on a dust device.""" + return self._exec_on_node(node, f"dmsetup message {shlex.quote(name)} 0 listbadblocks").strip() + + # ---------- "one-liner" recipes ---------- + + def add_bad_blocks(self, node: str, dm_device_name: str, blocks: list[int]) -> None: + """Convenience wrapper for adding bad blocks.""" + self.dm_dust_add_badblocks(node, dm_device_name, blocks) + + def enable_bad_blocks(self, node: str, dm_device_name: str, enable: bool = True) -> None: + """Convenience wrapper for enabling/disabling bad blocks.""" + if enable: + self.dm_dust_enable(node, dm_device_name) + else: + self.dm_dust_disable(node, dm_device_name) + + # ---------- Disk fault injection helpers ---------- + + def _format_and_mount(self, node: str, mapper: str, mount_point: str) -> None: + """Format and mount a device mapper device.""" + script = rf""" +set -e +if ! blkid {shlex.quote(mapper)} >/dev/null 2>&1; then + mkfs.ext4 -F {shlex.quote(mapper)} >/dev/null 2>&1 || true +fi +mkdir -p {shlex.quote(mount_point)} +mount {shlex.quote(mapper)} {shlex.quote(mount_point)} 2>/dev/null || true +echo {shlex.quote(mapper)} +""" + self._exec_on_node(node, script) + + def _unmount_and_cleanup(self, node: str, mount_point: str, dm_name: str) -> None: + """Unmount and remove a device mapper target.""" + script = rf""" +umount {shlex.quote(mount_point)} 2>/dev/null || true +dmsetup remove {shlex.quote(dm_name)} 2>/dev/null || true +""" + self._exec_on_node(node, script) + + def inject_disk_outage( + self, + node: str, + up_s: int = 10, + down_s: int = 5, + features: str = "", + dev: Optional[str] = None, + name: str = DEFAULT_DM_FLAKEY_NAME, + size_gb: int = DEFAULT_SIZE_GB, + ) -> str: + """ + Create a flakey DM device on the specified node. + If dev is None, creates a safe loopback disk of size_gb and wraps it. + Returns the mapper path (/dev/mapper/) you can mount/use for tests. + """ + if dev is None: + dev = self._loop_create(node, size_gb=size_gb) + + self.dm_flakey_create(node, name=name, dev=dev, up_s=up_s, down_s=down_s, features=features) + mapper = f"/dev/mapper/{name}" + mount_point = f"/mnt/{name}" + self._format_and_mount(node, mapper, mount_point) + return mapper + + def recover_disk_outage(self, node: str, name: str = DEFAULT_DM_FLAKEY_NAME) -> None: + """Unmount and remove the flakey target; also detach loop if we created one.""" + mount_point = f"/mnt/{name}" + self._unmount_and_cleanup(node, mount_point, name) + # Best effort detach loop used by our default image path + self._loop_destroy(node) + + def inject_badblocks( + self, + node: str, + blocks: Optional[list[int]] = None, + dev: Optional[str] = None, + name: str = DEFAULT_DM_DUST_NAME, + blksz: int = DEFAULT_BLOCK_SIZE, + size_gb: int = DEFAULT_SIZE_GB, + enable: bool = True, + ) -> str: + """ + Create a dust DM device and (optionally) enable failing reads on listed blocks. + If dev is None, creates a loopback disk of size_gb and wraps it. + Returns /dev/mapper/. + """ + if dev is None: + dev = self._loop_create(node, size_gb=size_gb) + + self.dm_dust_create(node, name=name, dev=dev, blksz=blksz) + if blocks: + self.dm_dust_add_badblocks(node, name, blocks) + if enable: + self.dm_dust_enable(node, name) + + mapper = f"/dev/mapper/{name}" + mount_point = f"/mnt/{name}" + self._format_and_mount(node, mapper, mount_point) + return mapper + + def recover_badblocks(self, node: str, name: str = DEFAULT_DM_DUST_NAME) -> None: + """Unmount and remove the dust target and detach loop if present.""" + mount_point = f"/mnt/{name}" + self._unmount_and_cleanup(node, mount_point, name) + self._loop_destroy(node) + + # ---------- LSE (Latent Sector Error) injection ---------- + + def _get_pvc_info(self, pvc_name: str, namespace: str) -> Dict[str, str]: + """Get PVC information including PV details.""" + out = self.kubectl.exec_command(f"kubectl -n {namespace} get pvc {pvc_name} -o json") + if not out: + raise RuntimeError(f"Failed to get PVC {pvc_name} in namespace {namespace}") + pvc = json.loads(out) + pv_name = pvc["spec"]["volumeName"] + + out = self.kubectl.exec_command(f"kubectl get pv {pv_name} -o json") + if not out: + raise RuntimeError(f"Failed to get PV {pv_name}") + pv = json.loads(out) + + return { + "pv_name": pv_name, + "capacity": pv["spec"]["capacity"]["storage"], + "storage_class": pv["spec"]["storageClassName"], + "local_path": pv["spec"]["local"]["path"], + } + + def _create_dm_dust_for_lse(self, node: str, local_path: str) -> None: + """Create a dm-dust device wrapping the device at local_path for LSE (Latent Sector Error) simulation.""" + script = f""" +set -e +echo 'Checking for dm_dust module...' +if ! lsmod | grep -q dm_dust; then + echo 'Loading dm_dust module...' + modprobe dm_dust || (echo 'ERROR: dm_dust module not available. Try running: sudo modprobe dm_dust' && exit 1) +else + echo 'dm_dust module already loaded' +fi + +echo 'Finding device for {shlex.quote(local_path)}...' +dev=$(findmnt -no SOURCE {shlex.quote(local_path)}) +if [ -z "$dev" ]; then + echo 'ERROR: No device found for mount point {shlex.quote(local_path)}' + exit 1 +fi + +echo "Found device: $dev" +if [ ! -b "$dev" ]; then + echo "ERROR: Device $dev is not a block device" + exit 1 +fi + +echo 'Getting device size...' +SECTORS=$(blockdev --getsz $dev) +if [ "$SECTORS" -eq 0 ]; then + echo 'ERROR: Device has 0 sectors' + exit 1 +fi + +echo "Device size: $SECTORS sectors" +echo 'Removing existing {DEFAULT_DM_LSE_NAME} if present...' +dmsetup remove {DEFAULT_DM_LSE_NAME} 2>/dev/null || true + +echo 'Creating dm-dust device...' +dmsetup create {DEFAULT_DM_LSE_NAME} --table "0 $SECTORS dust $dev 0 {DEFAULT_BLOCK_SIZE}" || ( + echo 'ERROR: Failed to create dm-dust device' + dmsetup targets + exit 1 +) + +echo 'dm-dust device created successfully' +dmsetup info {DEFAULT_DM_LSE_NAME} +""" + rc, stdout, stderr = self._exec_with_nsenter_mount(node, script, check=True) + print(f"[DEBUG] Command output: {stdout}") + if stderr: + print(f"[DEBUG] Command stderr: {stderr}") + + def _generate_pv_yaml( + self, pv_name: str, capacity: str, storage_class: str, local_path: str, node: str + ) -> str: + """Generate PersistentVolume YAML.""" + return f"""apiVersion: v1 +kind: PersistentVolume +metadata: + name: {pv_name} +spec: + capacity: + storage: {capacity} + accessModes: + - ReadWriteOnce + storageClassName: {storage_class} + local: + path: {local_path} + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - {node} + persistentVolumeReclaimPolicy: Delete +""" + + def _generate_pvc_yaml(self, pvc_name: str, namespace: str, capacity: str, storage_class: str, pv_name: str) -> str: + """Generate PersistentVolumeClaim YAML.""" + return f"""apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {pvc_name} + namespace: {namespace} +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {capacity} + storageClassName: {storage_class} + volumeName: {pv_name} +""" + + def _apply_yaml(self, yaml_content: str) -> None: + """Apply YAML content via kubectl.""" + self.kubectl.exec_command("kubectl apply -f -", input_data=yaml_content) + + def _wait_for_pvc_bound(self, pvc_name: str, namespace: str) -> None: + """Wait for PVC to become Bound.""" + cmd = [ + "kubectl", + "-n", + namespace, + "wait", + f"pvc/{pvc_name}", + "--for=condition=Bound", + f"--timeout={PVC_BOUND_TIMEOUT}", + ] + rc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + if rc.returncode != 0: + raise RuntimeError(f"PVC {pvc_name} did not become Bound: rc={rc.returncode}, err={rc.stderr}") + + def inject_lse(self, node: str, pvc_name: str, namespace: str) -> None: + """ + Replace the target PVC with a faulty one backed by dm-dust. + This simulates Latent Sector Errors (LSE) by wrapping the underlying device. + """ + # 1. Get PVC and PV information + pv_info = self._get_pvc_info(pvc_name, namespace) + pv_name = pv_info["pv_name"] + local_path = pv_info["local_path"] + + # Store recovery data + self.recovery_data = { + "node": node, + "pvc_name": pvc_name, + "namespace": namespace, + **pv_info, + } + + # 2. Wrap underlying device with dm-dust + self._create_dm_dust_for_lse(node, local_path) + + # 3. Delete PV, then PVC + self.kubectl.exec_command(f"kubectl delete pv {pv_name}") + self.kubectl.exec_command(f"kubectl -n {namespace} delete pvc {pvc_name}") + + # 4. Recreate PV pointing at /dev/mapper/khaos_lse + faulty_pv_path = f"/dev/mapper/{DEFAULT_DM_LSE_NAME}" + new_pv_yaml = self._generate_pv_yaml( + pv_name, pv_info["capacity"], pv_info["storage_class"], faulty_pv_path, node + ) + self._apply_yaml(new_pv_yaml) + + # 5. Recreate PVC + new_pvc_yaml = self._generate_pvc_yaml( + pvc_name, namespace, pv_info["capacity"], pv_info["storage_class"], pv_name + ) + self._apply_yaml(new_pvc_yaml) + + # 6. Wait until PVC is Bound + self._wait_for_pvc_bound(pvc_name, namespace) + + print(f"[KernelInjector] Faulty PVC {pvc_name} reattached via dm-dust and Bound") + + def recover_lse(self) -> None: + """Restore the original PVC/PV pointing at the raw device.""" + if not self.recovery_data: + print("[KernelInjector] No recovery data found, cannot recover LSE") + return + + data = self.recovery_data + node = data["node"] + pvc_name = data["pvc_name"] + namespace = data["namespace"] + pv_name = data["pv_name"] + + # Clean up dm-dust device first + script = f"dmsetup remove {DEFAULT_DM_LSE_NAME} 2>/dev/null || true" + self._exec_with_nsenter_mount(node, script, check=False) + + # Delete faulty PVC + PV + self.kubectl.exec_command(f"kubectl -n {namespace} delete pvc {pvc_name}") + self.kubectl.exec_command(f"kubectl delete pv {pv_name}") + + # Recreate clean PV + healthy_pv_yaml = self._generate_pv_yaml( + pv_name, data["capacity"], data["storage_class"], data["local_path"], node + ) + self._apply_yaml(healthy_pv_yaml) + + # Recreate PVC + healthy_pvc_yaml = self._generate_pvc_yaml( + pvc_name, namespace, data["capacity"], data["storage_class"], pv_name + ) + self._apply_yaml(healthy_pvc_yaml) + + print(f"[KernelInjector] PVC {pvc_name} restored to healthy device") + self.recovery_data = None + + def drop_caches(self, node: str, show_log:bool = True) -> None: + """ + Drop page cache, dentries, and inodes on the target node. + This forces the application to read from the disk, hitting the bad blocks. + """ + # echo 3 > /proc/sys/vm/drop_caches + # We use sysctl -w vm.drop_caches=3 which is cleaner if available, + # but writing to /proc is more universal. + script = "echo 3 > /proc/sys/vm/drop_caches" + self._exec_on_node(node, script) + if show_log: + print(f"[KernelInjector] Dropped caches on node {node}") diff --git a/sregym/generators/fault/inject_operator.py b/sregym/generators/fault/inject_operator.py new file mode 100644 index 0000000..0e8a7b6 --- /dev/null +++ b/sregym/generators/fault/inject_operator.py @@ -0,0 +1,249 @@ +import time + +import yaml + +from sregym.generators.fault.base import FaultInjector +from sregym.service.kubectl import KubeCtl + + +class K8SOperatorFaultInjector(FaultInjector): + def __init__(self, namespace: str): + self.namespace = namespace + self.kubectl = KubeCtl() + self.kubectl.create_namespace_if_not_exist(namespace) + + def _apply_yaml(self, cr_name: str, cr_yaml: dict): + yaml_path = f"/tmp/{cr_name}.yaml" + with open(yaml_path, "w") as file: + yaml.dump(cr_yaml, file) + + command = f"kubectl apply -f {yaml_path} -n {self.namespace}" + print(f"Namespace: {self.namespace}") + result = self.kubectl.exec_command(command) + print(f"Injected {cr_name}: {result}") + + def _delete_yaml(self, cr_name: str): + yaml_path = f"/tmp/{cr_name}.yaml" + command = f"kubectl delete -f {yaml_path} -n {self.namespace}" + result = self.kubectl.exec_command(command) + print(f"Recovered from misconfiguration {cr_name}: {result}") + + def inject_overload_replicas(self): + """ + Injects a TiDB misoperation custom resource. + The misconfiguration sets an unreasonably high number of TiDB replicas. + """ + cr_name = "overload-tidbcluster" + cr_yaml = { + "apiVersion": "pingcap.com/v1alpha1", + "kind": "TidbCluster", + "metadata": {"name": "basic", "namespace": self.namespace}, + "spec": { + "version": "v3.0.8", + "timezone": "UTC", + "pvReclaimPolicy": "Delete", + "pd": { + "baseImage": "pingcap/pd", + "replicas": 3, + "requests": {"storage": "1Gi"}, + "config": {}, + }, + "tikv": { + "baseImage": "pingcap/tikv", + "replicas": 3, + "requests": {"storage": "1Gi"}, + "config": {}, + }, + "tidb": { + "baseImage": "pingcap/tidb", + "replicas": 100000, # Intentional misconfiguration + "service": {"type": "ClusterIP"}, + "config": {}, + }, + }, + } + + self._apply_yaml(cr_name, cr_yaml) + + def recover_overload_replicas(self): + self.recover_fault("overload-tidbcluster") + + def inject_invalid_affinity_toleration(self): + """ + This misoperation specifies an invalid toleration effect. + """ + cr_name = "affinity-toleration-fault" + cr_yaml = { + "apiVersion": "pingcap.com/v1alpha1", + "kind": "TidbCluster", + "metadata": {"name": "basic", "namespace": self.namespace}, + "spec": { + "version": "v3.0.8", + "timezone": "UTC", + "pvReclaimPolicy": "Delete", + "pd": { + "baseImage": "pingcap/pd", + "replicas": 3, + "requests": {"storage": "1Gi"}, + "config": {}, + }, + "tikv": { + "baseImage": "pingcap/tikv", + "replicas": 3, + "requests": {"storage": "1Gi"}, + "config": {}, + }, + "tidb": { + "baseImage": "pingcap/tidb", + "replicas": 2, + "service": {"type": "ClusterIP"}, + "config": {}, + "tolerations": [ + { + "key": "test-keys", + "operator": "Equal", + "value": "test-value", + "effect": "TAKE_SOME_EFFECT", # Buggy: invalid toleration effect + "tolerationSeconds": 0, + } + ], + }, + }, + } + self._apply_yaml(cr_name, cr_yaml) + + def recover_invalid_affinity_toleration(self): + self.recover_fault("affinity-toleration-fault") + + def inject_security_context_fault(self): + """ + The fault sets an invalid runAsUser value. + """ + cr_name = "security-context-fault" + cr_yaml = { + "apiVersion": "pingcap.com/v1alpha1", + "kind": "TidbCluster", + "metadata": {"name": "basic", "namespace": self.namespace}, + "spec": { + "version": "v3.0.8", + "timezone": "UTC", + "pvReclaimPolicy": "Delete", + "pd": { + "baseImage": "pingcap/pd", + "replicas": 3, + "requests": {"storage": "1Gi"}, + "config": {}, + }, + "tikv": { + "baseImage": "pingcap/tikv", + "replicas": 3, + "requests": {"storage": "1Gi"}, + "config": {}, + }, + "tidb": { + "baseImage": "pingcap/tidb", + "replicas": 2, + "service": {"type": "ClusterIP"}, + "config": {}, + "podSecurityContext": {"runAsUser": -1}, # invalid runAsUser value + }, + }, + } + self._apply_yaml(cr_name, cr_yaml) + + def recover_security_context_fault(self): + self.recover_fault("security-context-fault") + + def inject_wrong_update_strategy(self): + """ + This fault specifies an invalid update strategy. + """ + cr_name = "deployment-update-strategy-fault" + cr_yaml = { + "apiVersion": "pingcap.com/v1alpha1", + "kind": "TidbCluster", + "metadata": {"name": "basic", "namespace": self.namespace}, + "spec": { + "version": "v3.0.8", + "timezone": "UTC", + "pvReclaimPolicy": "Delete", + "pd": { + "baseImage": "pingcap/pd", + "replicas": 3, + "requests": {"storage": "1Gi"}, + "config": {}, + }, + "tikv": { + "baseImage": "pingcap/tikv", + "replicas": 3, + "requests": {"storage": "1Gi"}, + "config": {}, + }, + "tidb": { + "baseImage": "pingcap/tidb", + "replicas": 2, + "service": {"type": "ClusterIP"}, + "config": {}, + "statefulSetUpdateStrategy": "SomeStrategyForUpdate", # invalid update strategy + }, + }, + } + self._apply_yaml(cr_name, cr_yaml) + + def recover_wrong_update_strategy(self): + self.recover_fault("deployment-update-strategy-fault") + + def inject_non_existent_storage(self): + """ + This fault specifies a non-existent storage class. + """ + cr_name = "non-existent-storage-fault" + cr_yaml = { + "apiVersion": "pingcap.com/v1alpha1", + "kind": "TidbCluster", + "metadata": {"name": "basic", "namespace": self.namespace}, + "spec": { + "version": "v3.0.8", + "timezone": "UTC", + "pvReclaimPolicy": "Delete", + "pd": { + "baseImage": "pingcap/pd", + "replicas": 3, + "requests": {"storage": "1Gi"}, + "config": {}, + "storageClassName": "ThisIsAStorageClass", # non-existent storage class + }, + "tikv": { + "baseImage": "pingcap/tikv", + "replicas": 3, + "requests": {"storage": "1Gi"}, + "config": {}, + }, + "tidb": { + "baseImage": "pingcap/tidb", + "replicas": 2, + "service": {"type": "ClusterIP"}, + "config": {}, + }, + }, + } + self._apply_yaml(cr_name, cr_yaml) + + def recover_non_existent_storage(self): + self.recover_fault("non-existent-storage-fault") + + def recover_fault(self, cr_name: str): + self._delete_yaml(cr_name) + clean_url = "https://raw.githubusercontent.com/pingcap/tidb-operator/v1.6.0/examples/basic/tidb-cluster.yaml" + command = f"kubectl apply -f {clean_url} -n {self.namespace}" + result = self.kubectl.exec_command(command) + print(f"Restored clean TiDBCluster: {result}") + + +if __name__ == "__main__": + namespace = "tidb-cluster" + tidb_fault_injector = K8SOperatorFaultInjector(namespace) + + tidb_fault_injector.inject_overload_replicas() + time.sleep(10) + tidb_fault_injector.recover_overload_replicas() diff --git a/sregym/generators/fault/inject_os.py b/sregym/generators/fault/inject_os.py new file mode 100644 index 0000000..c6b1877 --- /dev/null +++ b/sregym/generators/fault/inject_os.py @@ -0,0 +1,85 @@ +"""Inject faults at the OS layer.""" + +# TODO: replace with khaos +import json +import subprocess + +import yaml + +from sregym.generators.fault.base import FaultInjector +from sregym.generators.fault.helpers import ( + get_pids_by_name, + hr_mongod_process_names, + hr_svc_process_names, + sn_svc_process_names, +) +from sregym.paths import BASE_DIR +from sregym.service.kubectl import KubeCtl + + +class OSFaultInjector(FaultInjector): + def __init__(self): + pass + + # O.1: Kernel issue via the BPF filter + def kernel_bug(self): + return NotImplementedError + + # O.2: Simulate a disk woreout failure + def inject_disk_woreout(self): + pids = [] + proc_names = hr_mongod_process_names # if it is SocialNetwork + for term in proc_names: + term_pids = get_pids_by_name(term) + print(f"Found PIDs for term '{term}': {term_pids}") + pids.extend(term_pids) + + print(f"Injecting kernel fault into processes: {pids}") + + target_syscall = "write" # syscall for disk I/O + error_code = -5 # EIO (Input/output error) + + if not pids: + print("No processes found to inject faults.") + return + try: + # Run err_inject with the target syscall, error code, and PIDs + # ./err_inject [ ... ] + command = [ + "sudo", + str(BASE_DIR / "generators/fault/bpf_injector/err_inject"), + target_syscall, + str(error_code), + ] + [str(pid) for pid in pids] + # print(f"Running command: {' '.join(command)}") + subprocess.run(command, check=True) + except subprocess.CalledProcessError as e: + print(f"Failed to inject fault: {e}") + + # for pid in pids: + # try: + # print(f"Stopping process {pid}") + # subprocess.run(["sudo", "kill", "-9", str(pid)], check=True) + # except subprocess.CalledProcessError as e: + # print(f"Failed to stop process {pid}: {e}") + + def recover_disk_woreout(self): + bpf_folder_path = "/sys/fs/bpf/err_inject" + try: + command = ["sudo", "rm", "-rf", bpf_folder_path] + print(f"Removing folder: {bpf_folder_path}") + subprocess.run(command, check=True) + print(f"Successfully removed {bpf_folder_path}") + except subprocess.CalledProcessError as e: + print(f"Failed to remove folder {bpf_folder_path}: {e}") + except Exception as e: + print(f"An unexpected error occurred: {e}") + + +def main(): + injector = OSFaultInjector() + injector.inject_disk_woreout() + + +if __name__ == "__main__": + main() diff --git a/sregym/generators/fault/inject_otel.py b/sregym/generators/fault/inject_otel.py new file mode 100644 index 0000000..a85a6e2 --- /dev/null +++ b/sregym/generators/fault/inject_otel.py @@ -0,0 +1,70 @@ +import json +import subprocess + +from sregym.generators.fault.base import FaultInjector +from sregym.service.kubectl import KubeCtl + + +class OtelFaultInjector(FaultInjector): + def __init__(self, namespace: str): + self.namespace = namespace + self.kubectl = KubeCtl() + self.configmap_name = "flagd-config" + + def inject_fault(self, feature_flag: str): + command = f"kubectl get configmap {self.configmap_name} -n {self.namespace} -o json" + try: + output = self.kubectl.exec_command(command) + configmap = json.loads(output) + except subprocess.CalledProcessError: + raise ValueError(f"ConfigMap '{self.configmap_name}' not found in namespace '{self.namespace}'.") + except json.JSONDecodeError: + raise ValueError(f"Error decoding JSON for ConfigMap '{self.configmap_name}'.") + + flagd_data = json.loads(configmap["data"]["demo.flagd.json"]) + + if feature_flag in flagd_data["flags"]: + flagd_data["flags"][feature_flag]["defaultVariant"] = "on" + else: + raise ValueError(f"Feature flag '{feature_flag}' not found in ConfigMap '{self.configmap_name}'.") + + updated_data = {"demo.flagd.json": json.dumps(flagd_data, indent=2)} + self.kubectl.create_or_update_configmap(self.configmap_name, self.namespace, updated_data) + + self.kubectl.exec_command(f"kubectl rollout restart deployment flagd -n {self.namespace}") + + print(f"Fault injected: Feature flag '{feature_flag}' set to 'on'.") + + def recover_fault(self, feature_flag: str): + command = f"kubectl get configmap {self.configmap_name} -n {self.namespace} -o json" + try: + output = self.kubectl.exec_command(command) + configmap = json.loads(output) + except subprocess.CalledProcessError: + raise ValueError(f"ConfigMap '{self.configmap_name}' not found in namespace '{self.namespace}'.") + except json.JSONDecodeError: + raise ValueError(f"Error decoding JSON for ConfigMap '{self.configmap_name}'.") + + flagd_data = json.loads(configmap["data"]["demo.flagd.json"]) + + if feature_flag in flagd_data["flags"]: + flagd_data["flags"][feature_flag]["defaultVariant"] = "off" + else: + raise ValueError(f"Feature flag '{feature_flag}' not found in ConfigMap '{self.configmap_name}'.") + + updated_data = {"demo.flagd.json": json.dumps(flagd_data, indent=2)} + self.kubectl.create_or_update_configmap(self.configmap_name, self.namespace, updated_data) + + self.kubectl.exec_command(f"kubectl rollout restart deployment flagd -n {self.namespace}") + print(f"Fault recovered: Feature flag '{feature_flag}' set to 'off'.") + + +# Example usage: +# if __name__ == "__main__": +# namespace = "astronomy-shop" +# feature_flag = "adServiceFailure" + +# injector = OtelFaultInjector(namespace) + +# injector.inject_fault(feature_flag) +# injector.recover_fault(feature_flag) diff --git a/sregym/generators/fault/inject_remote_os.py b/sregym/generators/fault/inject_remote_os.py new file mode 100644 index 0000000..bdb6a1d --- /dev/null +++ b/sregym/generators/fault/inject_remote_os.py @@ -0,0 +1,225 @@ +"""Inject faults at the OS layer.""" + +# TODO: replace with khaos +import json +import subprocess +import os +import time + +import paramiko +from paramiko.client import AutoAddPolicy + +import yaml + +from sregym.generators.fault.base import FaultInjector +from sregym.generators.fault.helpers import ( + get_pids_by_name, + hr_mongod_process_names, + hr_svc_process_names, + sn_svc_process_names, +) +from sregym.paths import BASE_DIR +from sregym.service.kubectl import KubeCtl + + +# a script to create a process, keep send SIGTERM to kubelet +KILL_KUBELET_SCRIPT = """ +#!/bin/bash +while true; do + sudo pkill -TERM kubelet + sleep 1 +done +""" + +class RemoteOSFaultInjector(FaultInjector): + def __init__(self): + self.kubectl = KubeCtl() + + def check_remote_host(self): + # kubectl get nodes -o json, if (kind-worker) is in the nodes, return False + cmd = "kubectl get nodes" + out = self.kubectl.exec_command(cmd) + if "kind-worker" in out: + print("You are using Kind.") + return False + + # try to find the script/ansible/inventory.yml, if it does not exist, return False + if not os.path.exists(f"{BASE_DIR}/../scripts/ansible/inventory.yml"): + print("Inventory file not found: " + f"{BASE_DIR}/../scripts/ansible/inventory.yml") + return False + return True + + def get_host_info(self): + # read the script/ansible/inventory.yml, and return the host info + worker_info = {} + with open(f"{BASE_DIR}/../scripts/ansible/inventory.yml", "r") as f: + inventory = yaml.safe_load(f) + + # Extract variables from all.vars + variables = {} + if "all" in inventory and "vars" in inventory["all"]: + variables = inventory["all"]["vars"] + + # get all the workers + if "all" in inventory and "children" in inventory["all"] and "worker_nodes" in inventory["all"]["children"]: + workers = inventory["all"]["children"]["worker_nodes"]["hosts"] + for worker in workers: + ansible_host = workers[worker]["ansible_host"] + ansible_user = workers[worker]["ansible_user"] + + # Replace variables in ansible_user + ansible_user = self._replace_variables(ansible_user, variables) + + # Skip if variables couldn't be resolved + if "{{" in ansible_user: + print(f"Warning: Unresolved variables in {worker} user: {ansible_user}") + continue + + worker_info[ansible_host] = ansible_user + return worker_info + + print(f"No worker nodes found in the inventory file, your cluster is not applicable for this fault injector") + return None + + def _replace_variables(self, text: str, variables: dict) -> str: + """Replace {{ variable_name }} with actual values from variables dict.""" + import re + + def replace_var(match): + var_name = match.group(1).strip() + if var_name in variables: + return str(variables[var_name]) + else: + print(f"Warning: Variable '{var_name}' not found in variables") + return match.group(0) # Return original if not found + + # Replace {{ variable_name }} patterns + return re.sub(r'\{\{\s*(\w+)\s*\}\}', replace_var, text) + + def inject_kubelet_crash(self): + # inject a script to create a process, keep send SIGTERM to kubelet + if not self.check_remote_host(): + print("Your cluster is not applicable for this fault injector, It should be remote.") + return + + self.worker_info = self.get_host_info() + if not self.worker_info: + return + + for host, user in self.worker_info.items(): + if self.exist_script_on_host(host, user, "kill_kubelet.sh"): + print("Kubelet killer already exists on the host. Cleaning up...") + self.clean_up_script_on_host(host, user, "kill_kubelet.sh") + + for host, user in self.worker_info.items(): + pid = self.inject_script_on_host(host, user, KILL_KUBELET_SCRIPT, "kill_kubelet.sh") + if pid: + print(f"Successfully started kubelet killer on {host} with PID {pid}") + else: + print(f"Failed to start kubelet killer on {host}") + return + return + + def recover_kubelet_crash(self): + if not self.check_remote_host(): + print("No need to clean up.") + return + if not hasattr(self, "worker_info"): + self.worker_info = self.get_host_info() + for host, user in self.worker_info.items(): + self.clean_up_script_on_host(host, user, "kill_kubelet.sh") + time.sleep(3) + return + + ###### Helpers ###### + def exist_script_on_host(self, host: str, user: str, script_name: str): + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(AutoAddPolicy()) + ssh.connect(host, username=user) + stdin, stdout, stderr = ssh.exec_command(f"ls /tmp/{script_name}") + return script_name in stdout.read().decode() + + def inject_script_on_host(self, host: str, user: str, script: str, script_name: str): + # ssh into the host, and write a script to create a process, keep send SIGTERM to kubelet + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(AutoAddPolicy()) + script_path = f"/tmp/{script_name}" + log_path = f"/tmp/{script_name}.log" + + try: + ssh.connect(host, username=user) + # Create a script file on the remote host + print(f"Connected to {host} with user {user}") + + sftp = ssh.open_sftp() + with sftp.file(script_path, 'w') as f: + f.write(script) + sftp.close() + + # Make the script executable and run it in background + # First make it executable + stdin, stdout, stderr = ssh.exec_command(f"chmod +x {script_path}") + stdout.channel.recv_exit_status() # Wait for completion + + # Then start the script in background and capture PID + # Use a more reliable method to get PID + cmd = f"nohup {script_path} > {log_path} 2>&1 & echo $! > /tmp/{script_name}.pid" + stdin, stdout, stderr = ssh.exec_command(cmd) + stdout.channel.recv_exit_status() # Wait for completion + + # Read the PID from the file + stdin, stdout, stderr = ssh.exec_command(f"cat /tmp/{script_name}.pid") + pid = stdout.readline().strip() + print(f"Executed command {cmd} on {host}") + print(f"Read PID from file: {pid}") + print(f"Started {script_name} on {host} with PID {pid}") + # Store the PID for later cleanup + return pid + + except Exception as e: + print(f"Failed to start {script_name} on {host}: {e}") + return None + finally: + ssh.close() + + def clean_up_script_on_host(self, host: str, user: str, script_name: str): + """Clean up the script on the remote host.""" + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(AutoAddPolicy()) + script_path = f"/tmp/{script_name}" + log_path = f"/tmp/{script_name}.log" + pid_path = f"/tmp/{script_name}.pid" + + try: + ssh.connect(host, username=user) + + # First, try to read the PID from the file + stdin, stdout, stderr = ssh.exec_command(f"cat {pid_path} 2>/dev/null") + pid = stdout.readline().strip() + + if pid: + # Kill the process and clean up the script + cmd = f"kill {pid} 2>/dev/null; rm -f {script_path} {log_path} {pid_path}" + stdin, stdout, stderr = ssh.exec_command(cmd) + print(f"Cleaned up {script_name} on {host} (PID {pid})") + print(f"Waiting for kubelet to restart...") + time.sleep(3) + + except Exception as e: + print(f"Failed to clean up {script_name} on {host}: {e}") + finally: + ssh.close() + + +def main(): + print("Testing RemoteOSFaultInjector") + injector = RemoteOSFaultInjector() + print("Injecting kubelet crash...") + injector.inject_kubelet_crash() + input("Press Enter to recover kubelet crash") + print("Recovering kubelet crash...") + injector.recover_kubelet_crash() + + +if __name__ == "__main__": + main() diff --git a/sregym/generators/fault/inject_tt.py b/sregym/generators/fault/inject_tt.py new file mode 100644 index 0000000..b6dec6a --- /dev/null +++ b/sregym/generators/fault/inject_tt.py @@ -0,0 +1,174 @@ +import json +import logging +import yaml +from typing import Dict, List, Any, Optional +import time + +from sregym.generators.fault.base import FaultInjector +from sregym.service.kubectl import KubeCtl + +logger = logging.getLogger(__name__) + + +class TrainTicketFaultInjector(FaultInjector): + + def __init__(self, namespace: str = "train-ticket"): + super().__init__(namespace) + self.namespace = namespace + self.kubectl = KubeCtl() + self.configmap_name = "flagd-config" + self.flagd_deployment = "flagd" + + self.fault_mapping = { + "fault-1-async-message-sequence-control": "F1: Asynchronous message delivery lacks sequence control", + "fault-2-data-request-order-inconsistency": "F2: Different data requests for the same report are returned in an unexpected order", + "fault-3-jvm-docker-config-mismatch": "F3: JVM configurations are inconsistent with Docker configurations", + "fault-4-ssl-offloading-granularity": "F4: SSL offloading happens in a fine granularity (happening in almost each Docker instance)", + "fault-5-high-load-timeout-cascade": "F5: The high load of a type of requests causes the timeout failure of another type of requests", + "fault-6-sql-error-recursive-requests": "F6: Endless recursive requests of a microservice are caused by SQL errors of another dependent microservice", + "fault-7-third-party-service-overload": "F7: The overload of requests to a third-party service leads to denial of service", + "fault-8-request-key-propagation-failure": "F8: The key in the request of one microservice is not passed to its dependent microservice", + "fault-9-css-bidirectional-display-error": "F9: There is a CSS display style error in bi-directional", + "fault-10-bom-api-unexpected-output": "F10: An API used in a special case of BOM updating returns unexpected output", + "fault-11-bom-data-sequence-error": "F11: The BOM data is updated in an unexpected sequence", + "fault-12-price-status-query-chain-error": "F12: Price status querying does not consider an unexpected output of a microservice in its call chain", + "fault-13-price-optimization-order-error": "F13: Price optimization steps are executed in an unexpected order", + "fault-14-locked-product-cpi-calculation-error": "F14: There is a mistake in including the locked product in CPI calculation", + "fault-15-spark-actor-system-config-error": "F15: The spark actor is used for the configuration of actorSystem (part of Apache Spark) instead of the system actor", + "fault-16-spray-max-content-length-limit": "F16: The 'max-content-length' configuration of spray is only 2 Mb, not allowing to support to upload a big file", + "fault-17-nested-sql-select-clause-error": "F17: Too many nested 'select' and 'from' clauses are in the constructed SQL statement", + "fault-18-json-chart-data-null-value": "F18: One key of the returned JSON data for the UI chart includes the null value", + "fault-19-product-price-french-format-error": "F19: The product price is not formatted correctly in the French format", + "fault-20-jboss-db2-jar-classpath-error": "F20: The JBoss startup classpath parameter does not include the right DB2 jar package", + "fault-21-aria-labeled-accessibility-error": "F21: The 'aria-labeled-by' element for accessibility cannot be located by the JAWS", + "fault-22-sql-column-name-mismatch-error": "F22: The constructed SQL statement includes a wrong column name in the 'select' part according to its 'from' part", + } + + def inject_fault(self, fault_type: str) -> bool: + print(f"[TrainTicket] Injecting fault: {fault_type}") + return self._set_fault_state(fault_type, "on") + + def recover_fault(self, fault_type: str) -> bool: + print(f"[TrainTicket] Recovering from fault: {fault_type}") + return self._set_fault_state(fault_type, "off") + + def _get_configmap(self) -> Dict[str, Any]: + try: + result = self.kubectl.exec_command( + f"kubectl get configmap {self.configmap_name} -n {self.namespace} -o json" + ) + return json.loads(result) if result else {} + + except Exception as e: + logger.error(f"Error getting ConfigMap: {e}") + return {} + + def _set_fault_state(self, fault_type: str, state: str) -> bool: + """Update fault state in ConfigMap. + + Args: + fault_type: Name of the fault (e.g., 'fault-6-sql-error-recursive-requests') + state: 'on' or 'off' + """ + print(f"Setting {fault_type} to {state}...") + + configmap = self._get_configmap() + if not configmap: + print("Failed to get ConfigMap") + return False + + flags_yaml = configmap["data"]["flags.yaml"] + flags_data = yaml.safe_load(flags_yaml) + + if fault_type not in flags_data["flags"]: + print(f"Fault '{fault_type}' not found in ConfigMap") + return False + + # Update fault state + flags_data["flags"][fault_type]["defaultVariant"] = state + + updated_yaml = yaml.dump(flags_data, default_flow_style=False) + + try: + result = self.kubectl.update_configmap( + name=self.configmap_name, namespace=self.namespace, data={"flags.yaml": updated_yaml} + ) + + if result: + print(f"✅ {fault_type} set to {state}") + + verification = self._get_configmap() + if verification and "data" in verification: + flags_verification = yaml.safe_load(verification["data"]["flags.yaml"]) + actual_value = flags_verification["flags"][fault_type]["defaultVariant"] + if actual_value == state: + print(f"✅ ConfigMap verified: {fault_type} = {state}") + else: + print(f"❌ ConfigMap verification failed: expected {state}, got {actual_value}") + return False + + self._restart_flagd() + print("✅ flagd restarted successfully") + + print("Sleeping for 20 seconds to flag value change to take effect...") + time.sleep(20) + return True + else: + print("Failed to update ConfigMap") + return False + + except Exception as e: + print(f"❌ Error updating fault: {e}") + return False + + def _restart_flagd(self): + print(f"[TrainTicket] Restarting flagd deployment...") + + try: + result = self.kubectl.exec_command( + f"kubectl rollout restart deployment/{self.flagd_deployment} -n {self.namespace}" + ) + print(f"[TrainTicket] flagd deployment restarted successfully: {result}") + + except Exception as e: + logger.error(f"Error restarting flagd: {e}") + + def get_fault_status(self, fault_type: str) -> str: + try: + result = self.kubectl.exec_command( + f"kubectl get configmap {self.configmap_name} -n {self.namespace} -o jsonpath='{{.data.flags\\.yaml}}'" + ) + + if result and fault_type in result: + import yaml + + flags_data = yaml.safe_load(result) + + if "flags" in flags_data and fault_type in flags_data["flags"]: + return flags_data["flags"][fault_type].get("defaultVariant", "unknown") + + except Exception as e: + logger.error(f"Error getting fault status: {e}") + + return "unknown" + + def list_available_faults(self) -> List[str]: + return list(self.fault_mapping.keys()) + + def get_fault_description(self, fault_name: str) -> Optional[str]: + return self.fault_mapping.get(fault_name) + + # Override base class methods to use feature flag-based fault injection + def _inject(self, fault_type: str, microservices: list[str] = None, duration: str = None): + """Override base class _inject to use feature flag-based injection.""" + print(f"[TrainTicket] Using feature flag injection for: {fault_type}") + return self.inject_fault(fault_type) + + def _recover(self, fault_type: str, microservices: list[str] = None): + """Override base class _recover to use feature flag-based recovery.""" + print(f"[TrainTicket] Using feature flag recovery for: {fault_type}") + return self.recover_fault(fault_type) + + +if __name__ == "__main__": + print("TrainTicketFaultInjector - Use via SREGym CLI") diff --git a/sregym/generators/fault/inject_virtual.py b/sregym/generators/fault/inject_virtual.py new file mode 100644 index 0000000..cd138cb --- /dev/null +++ b/sregym/generators/fault/inject_virtual.py @@ -0,0 +1,2128 @@ +"""Inject faults at the virtualization layer: K8S, Docker, etc.""" + +import copy +import json +import time +from pathlib import Path + +import yaml +from kubernetes import config + +from sregym.generators.fault.base import FaultInjector +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.base import Application +from sregym.service.helm import Helm +from sregym.service.kubectl import KubeCtl + + +class VirtualizationFaultInjector(FaultInjector): + def __init__(self, namespace: str): + super().__init__(namespace) + self.namespace = namespace + self.kubectl = KubeCtl() + self.mongo_service_pod_map = { + "url-shorten-mongodb": "url-shorten-service", + } + + def delete_service_pods(self, target_service_pods: list[str]): + """Kill the corresponding service pod to enforce the fault.""" + for pod in target_service_pods: + delete_pod_command = f"kubectl delete pod {pod} -n {self.namespace}" + delete_result = self.kubectl.exec_command(delete_pod_command) + print(f"Deleted service pod {pod} to enforce the fault: {delete_result}") + + ############# FAULT LIBRARY ################ + + # V.1 - misconfig_k8s: Misconfigure service port in Kubernetes - Misconfig + def inject_misconfig_k8s(self, microservices: list[str]): + """Inject a fault to misconfigure service's target port in Kubernetes.""" + for service in microservices: + service_config = self._modify_target_port_config( + from_port=9090, + to_port=9999, + configs=self.kubectl.get_service_json(service, self.testbed), + ) + + print(f"Misconfig fault for service: {service} | namespace: {self.testbed}") + self.kubectl.patch_service(service, self.testbed, service_config) + + def recover_misconfig_k8s(self, microservices: list[str]): + for service in microservices: + service_config = self._modify_target_port_config( + from_port=9999, + to_port=9090, + configs=self.kubectl.get_service_json(service, self.testbed), + ) + + print(f"Recovering for service: {service} | namespace: {self.testbed}") + self.kubectl.patch_service(service, self.testbed, service_config) + + # V.2 - auth_miss_mongodb: Authentication missing for MongoDB - Auth + def inject_auth_miss_mongodb(self, microservices: list[str]): + """Inject a fault to enable TLS for a MongoDB service. + + NOTE: modifies the values.yaml file for the service. The fault is created + by forcing the service to require TLS for connections, which will fail if + the certificate is not provided. + + NOTE: mode: requireTLS, certificateKeyFile, and CAFile are required fields. + """ + for service in microservices: + # Prepare the set values for helm upgrade + set_values = { + "url-shorten-mongodb.tls.mode": "requireTLS", + "url-shorten-mongodb.tls.certificateKeyFile": "/etc/tls/tls.pem", + "url-shorten-mongodb.tls.CAFile": "/etc/tls/ca.crt", + } + + # Define Helm upgrade configurations + helm_args = { + "release_name": "social-network", + "chart_path": TARGET_MICROSERVICES / "socialNetwork/helm-chart/socialnetwork/", + "namespace": self.namespace, + "values_file": TARGET_MICROSERVICES / "socialNetwork/helm-chart/socialnetwork/values.yaml", + "set_values": set_values, + } + + Helm.upgrade(**helm_args) + + pods = self.kubectl.list_pods(self.namespace) + target_service_pods = [ + pod.metadata.name for pod in pods.items if self.mongo_service_pod_map[service] in pod.metadata.name + ] + print(f"Target Service Pods: {target_service_pods}") + self.delete_service_pods(target_service_pods) + + self.kubectl.exec_command(f"kubectl rollout restart deployment {service} -n {self.namespace}") + + def recover_auth_miss_mongodb(self, microservices: list[str]): + for service in microservices: + set_values = { + "url-shorten-mongodb.tls.mode": "disabled", + "url-shorten-mongodb.tls.certificateKeyFile": "", + "url-shorten-mongodb.tls.CAFile": "", + } + + helm_args = { + "release_name": "social-network", + "chart_path": TARGET_MICROSERVICES / "socialNetwork/helm-chart/socialnetwork/", + "namespace": self.namespace, + "values_file": TARGET_MICROSERVICES / "socialNetwork/helm-chart/socialnetwork/values.yaml", + "set_values": set_values, + } + + Helm.upgrade(**helm_args) + + pods = self.kubectl.list_pods(self.namespace) + target_service_pods = [ + pod.metadata.name for pod in pods.items if self.mongo_service_pod_map[service] in pod.metadata.name + ] + print(f"Target Service Pods: {target_service_pods}") + + self.delete_service_pods(target_service_pods) + self.kubectl.exec_command(f"kubectl rollout restart deployment {service} -n {self.namespace}") + + # V.3 - scale_pods_to_zero: Scale pods to zero - Deploy/Operation + def inject_scale_pods_to_zero(self, microservices: list[str]): + """Inject a fault to scale pods to zero for a service.""" + for service in microservices: + self.kubectl.exec_command(f"kubectl scale deployment {service} --replicas=0 -n {self.namespace}") + print(f"Scaled deployment {service} to 0 replicas | namespace: {self.namespace}") + + def recover_scale_pods_to_zero(self, microservices: list[str]): + for service in microservices: + self.kubectl.exec_command(f"kubectl scale deployment {service} --replicas=1 -n {self.namespace}") + print(f"Scaled deployment {service} back to 1 replica | namespace: {self.namespace}") + + # V.4 - assign_to_non_existent_node: Assign to non-existent or NotReady node - Dependency + def inject_assign_to_non_existent_node(self, microservices: list[str]): + """Inject a fault to assign a service to a non-existent or NotReady node.""" + non_existent_node_name = "extra-node" + for service in microservices: + deployment_yaml = self._get_deployment_yaml(service) + deployment_yaml["spec"]["template"]["spec"]["nodeSelector"] = { + "kubernetes.io/hostname": non_existent_node_name + } + + # Write the modified YAML to a temporary file + modified_yaml_path = self._write_yaml_to_file(service, deployment_yaml) + + delete_command = f"kubectl delete deployment {service} -n {self.namespace}" + self.kubectl.exec_command(delete_command) + + apply_command = f"kubectl apply -f {modified_yaml_path} -n {self.namespace}" + self.kubectl.exec_command(apply_command) + print(f"Redeployed {service} to node {non_existent_node_name}.") + + def recover_assign_to_non_existent_node(self, microservices: list[str]): + for service in microservices: + deployment_yaml = self._get_deployment_yaml(service) + if "nodeSelector" in deployment_yaml["spec"]["template"]["spec"]: + del deployment_yaml["spec"]["template"]["spec"]["nodeSelector"] + + modified_yaml_path = self._write_yaml_to_file(service, deployment_yaml) + + delete_command = f"kubectl delete deployment {service} -n {self.namespace}" + self.kubectl.exec_command(delete_command) + + apply_command = f"kubectl apply -f {modified_yaml_path} -n {self.namespace}" + self.kubectl.exec_command(apply_command) + print(f"Removed nodeSelector for service {service} and redeployed.") + + # --- V.5 - PVC claim name mismatch (per-service) --- + def inject_pvc_claim_mismatch(self, microservices: list[str]): + """Make pods Pending by pointing Deployments at a non-existent PVC claim.""" + for service in microservices: + dep = self._get_deployment_yaml(service) + original = copy.deepcopy(dep) + + pod_spec = dep.get("spec", {}).get("template", {}).get("spec", {}) + volumes = pod_spec.get("volumes", []) + changed = False + + for v in volumes: + pvc = v.get("persistentVolumeClaim") + if pvc and "claimName" in pvc: + pvc["claimName"] = pvc["claimName"] + "-broken" + changed = True + + if not changed: + print(f"[{service}] No PVC volumes found; skipping.") + continue + + modified = self._write_yaml_to_file(service, dep) + + # Replace the deployment with the modified one + self.kubectl.exec_command(f"kubectl delete deployment {service} -n {self.namespace}") + self.kubectl.exec_command(f"kubectl apply -f {modified} -n {self.namespace}") + + # Save the original for recovery + self._write_yaml_to_file(service, original) + + print(f"[{service}] Patched claimName -> (…-broken). Pods should go Pending.") + + self.kubectl.wait_for_stable(self.namespace) + + def recover_pvc_claim_mismatch(self, microservices: list[str]): + """Restore the original Deployment YAML saved in /tmp/{svc}_modified.yaml.""" + for service in microservices: + orig_path = f"/tmp/{service}_modified.yaml" + self.kubectl.exec_command(f"kubectl delete deployment {service} -n {self.namespace}") + self.kubectl.exec_command(f"kubectl apply -f {orig_path} -n {self.namespace}") + print(f"[{service}] Restored original claimName.") + + self.kubectl.wait_for_ready(self.namespace) + + # --- V.6 - Storage provisioner outage (cluster-scoped) --- + # TODO: This fault does not work because the PVCs are bound before fault injection + # def inject_storage_provisioner_outage(self): + # """ + # Make all new PVCs Pending by disabling common local provisioners. + # No-op if a target provisioner isn't present. + # """ + # cmds = [ + # # OpenEBS localPV provisioner + # "kubectl -n openebs scale deploy openebs-localpv-provisioner --replicas=0", + # # Rancher/Kind local-path provisioner + # "kubectl -n local-path-storage scale deploy local-path-provisioner --replicas=0", + # ] + # for c in cmds: + # try: + # self.kubectl.exec_command(c) + # print(f"Ran: {c}") + # except Exception as e: + # print(f"Skipping: {c} ({e})") + + # print("Storage provisioner outage injected.") + + # def recover_storage_provisioner_outage(self): + # cmds = [ + # "kubectl -n openebs scale deploy openebs-localpv-provisioner --replicas=1", + # "kubectl -n local-path-storage scale deploy local-path-provisioner --replicas=1", + # "kubectl -n kube-system scale deploy hostpath-provisioner --replicas=1", + # ] + # for c in cmds: + # try: + # self.kubectl.exec_command(c) + # print(f"Ran: {c}") + # except Exception as e: + # print(f"Skipping: {c} ({e})") + + # # Give the controller a moment and ensure PVCs start binding again + # self.kubectl.wait_for_stable(self.namespace) + # print("✅ Storage provisioner outage recovered.") + + # V.6 - wrong binary usage incident + def inject_wrong_bin_usage(self, microservices: list[str]): + """Inject a fault to use the wrong binary of a service.""" + for service in microservices: + deployment_yaml = self._get_deployment_yaml(service) + + # Modify the deployment YAML to use the 'geo' binary instead of the 'profile' binary + containers = deployment_yaml["spec"]["template"]["spec"]["containers"] + for container in containers: + if "command" in container and "profile" in container["command"]: + print(f"Changing binary for container {container['name']} from 'profile' to 'geo'.") + container["command"] = ["geo"] # Replace 'profile' with 'geo' + + modified_yaml_path = self._write_yaml_to_file(service, deployment_yaml) + + # Delete the deployment and re-apply + delete_command = f"kubectl delete deployment {service} -n {self.namespace}" + apply_command = f"kubectl apply -f {modified_yaml_path} -n {self.namespace}" + self.kubectl.exec_command(delete_command) + self.kubectl.exec_command(apply_command) + + print(f"Injected wrong binary usage fault for service: {service}") + + def recover_wrong_bin_usage(self, microservices: list[str]): + for service in microservices: + deployment_yaml = self._get_deployment_yaml(service) + + containers = deployment_yaml["spec"]["template"]["spec"]["containers"] + for container in containers: + if "command" in container and "geo" in container["command"]: + print(f"Reverting binary for container {container['name']} from 'geo' to 'profile'.") + container["command"] = ["profile"] # Restore 'geo' back to 'profile' + + modified_yaml_path = self._write_yaml_to_file(service, deployment_yaml) + + delete_command = f"kubectl delete deployment {service} -n {self.namespace}" + apply_command = f"kubectl apply -f {modified_yaml_path} -n {self.namespace}" + self.kubectl.exec_command(delete_command) + self.kubectl.exec_command(apply_command) + + print(f"Recovered from wrong binary usage fault for service: {service}") + + # V.7 - Inject a fault by deleting the specified service + def inject_missing_service(self, microservices: list[str]): + """Inject a fault by deleting the specified service.""" + for service in microservices: + service_yaml_file = self._get_service_yaml(service) + delete_service_command = f"kubectl delete service {service} -n {self.namespace}" + result = self.kubectl.exec_command(delete_service_command) + print(f"Deleted service {service} to enforce the fault: {result}") + + self._write_yaml_to_file(service, service_yaml_file) + + # Restart all the pods + self.kubectl.exec_command(f"kubectl delete pods --all -n {self.namespace}") + self.kubectl.wait_for_stable(namespace=self.namespace) + + def recover_missing_service(self, microservices: list[str]): + """Recover the fault by recreating the specified service.""" + for service in microservices: + delete_service_command = f"kubectl delete service {service} -n {self.namespace}" + result = self.kubectl.exec_command(delete_service_command) + create_service_command = f"kubectl apply -f /tmp/{service}_modified.yaml -n {self.namespace}" + result = self.kubectl.exec_command(create_service_command) + print(f"Recreated service {service} to recover from the fault: {result}") + + # V.8 - Inject a fault by modifying the resource request of a service + def inject_resource_request(self, microservices: list[str], memory_limit_func): + """Inject a fault by modifying the resource request of a service.""" + for service in microservices: + original_deployment_yaml = self._get_deployment_yaml(service) + deployment_yaml = memory_limit_func(original_deployment_yaml) + modified_yaml_path = self._write_yaml_to_file(service, deployment_yaml) + + # Delete the deployment and re-apply + delete_command = f"kubectl delete deployment {service} -n {self.namespace}" + apply_command = f"kubectl apply -f {modified_yaml_path} -n {self.namespace}" + self.kubectl.exec_command(delete_command) + self.kubectl.exec_command(apply_command) + + self._write_yaml_to_file(service, original_deployment_yaml) + + def recover_resource_request(self, microservices: list[str]): + """Recover the fault by restoring the original resource request of a service.""" + for service in microservices: + # Delete the deployment and re-apply + delete_command = f"kubectl delete deployment {service} -n {self.namespace}" + apply_command = f"kubectl apply -f /tmp/{service}_modified.yaml -n {self.namespace}" + self.kubectl.exec_command(delete_command) + self.kubectl.exec_command(apply_command) + + print(f"Recovered from resource request fault for service: {service}") + + # V.9 - Manually patch a service's selector to include an additional label + def inject_wrong_service_selector(self, microservices: list[str]): + for service in microservices: + print(f"Injecting wrong selector for service: {service} | namespace: {self.namespace}") + + service_config = self.kubectl.get_service_json(service, self.namespace) + current_selectors = service_config.get("spec", {}).get("selector", {}) + + # Adding a wrong selector to the service + current_selectors["current_service_name"] = service + service_config["spec"]["selector"] = current_selectors + self.kubectl.patch_service(service, self.namespace, service_config) + + print(f"Patched service {service} with selector {service_config['spec']['selector']}") + + def recover_wrong_service_selector(self, microservices: list[str]): + for service in microservices: + service_config = self.kubectl.get_service_json(service, self.namespace) + + service_config = self.kubectl.get_service_json(service, self.namespace) + current_selectors = service_config.get("spec", {}).get("selector", {}) + + # Set the key to None to delete it from the live object + current_selectors["current_service_name"] = None + service_config["spec"]["selector"] = current_selectors + self.kubectl.patch_service(service, self.namespace, service_config) + + print(f"Recovered from wrong service selector fault for service: {service}") + + # V.10 - Inject service DNS resolution failure by patching CoreDNS ConfigMap + def inject_service_dns_resolution_failure(self, microservices: list[str]): + for service in microservices: + fqdn = f"{service}.{self.namespace}.svc.cluster.local" + + # Get configmap as structured data + cm_yaml = self.kubectl.exec_command("kubectl -n kube-system get cm coredns -o yaml") + cm_data = yaml.safe_load(cm_yaml) + corefile = cm_data["data"]["Corefile"] + + start_line_id = f"template ANY ANY {fqdn} {{" + if start_line_id in corefile: + print("NXDOMAIN template already present; recovering from previous injection") + self.recover_service_dns_resolution_failure([service]) + + # Re-fetch after recovery + cm_yaml = self.kubectl.exec_command("kubectl -n kube-system get cm coredns -o yaml") + cm_data = yaml.safe_load(cm_yaml) + corefile = cm_data["data"]["Corefile"] + + # Create the NXDOMAIN template block + template_block = ( + f" template ANY ANY {fqdn} {{\n" + f' match "^{fqdn}\\.$"\n' + f" rcode NXDOMAIN\n" + f" fallthrough\n" + f" }}\n" + ) + + # Find the position of "kubernetes" word + kubernetes_pos = corefile.find("kubernetes") + if kubernetes_pos == -1: + print("Could not locate 'kubernetes' plugin in Corefile") + return + + # Find the start of the line containing "kubernetes" + line_start = corefile.rfind("\n", 0, kubernetes_pos) + if line_start == -1: + line_start = 0 + else: + line_start += 1 + + # Insert template block before the kubernetes line + new_corefile = corefile[:line_start] + template_block + corefile[line_start:] + + cm_data["data"]["Corefile"] = new_corefile + + # Apply using temporary file + tmp_file_path = self._write_yaml_to_file("coredns", cm_data) + + self.kubectl.exec_command(f"kubectl apply -f {tmp_file_path}") + + # Restart CoreDNS + self.kubectl.exec_command("kubectl -n kube-system rollout restart deployment coredns") + self.kubectl.exec_command("kubectl -n kube-system rollout status deployment coredns --timeout=30s") + + print(f"Injected Service DNS Resolution Failure fault for service: {service}") + + def recover_service_dns_resolution_failure(self, microservices: list[str]): + for service in microservices: + fqdn = f"{service}.{self.namespace}.svc.cluster.local" + + # Get configmap as structured data + cm_yaml = self.kubectl.exec_command("kubectl -n kube-system get cm coredns -o yaml") + cm_data = yaml.safe_load(cm_yaml) + corefile = cm_data["data"]["Corefile"] + + start_line_id = f"template ANY ANY {fqdn} {{" + if start_line_id not in corefile: + print("No NXDOMAIN template found; nothing to do") + return + + lines = corefile.split("\n") + new_lines = [] + skip_block = False + + for line in lines: + # Start of template block + if not skip_block and start_line_id in line: + skip_block = True + continue + + # End of template block + if skip_block and line.strip() == "}": + skip_block = False + continue + + # Skip lines inside the block + if skip_block: + continue + + # Keep all other lines + new_lines.append(line) + + if skip_block: + print("WARNING: Template block was not properly closed") + return + + new_corefile = "\n".join(new_lines) + + # Verify if the removal worked + if start_line_id in new_corefile: + print("ERROR: Template was not successfully removed!") + return + + cm_data["data"]["Corefile"] = new_corefile + + # Apply using temporary file + tmp_file_path = self._write_yaml_to_file("coredns", cm_data) + self.kubectl.exec_command(f"kubectl apply -f {tmp_file_path}") + + # Restart CoreDNS + self.kubectl.exec_command("kubectl -n kube-system rollout restart deployment coredns") + self.kubectl.exec_command("kubectl -n kube-system rollout status deployment coredns --timeout=30s") + + print(f"Recovered Service DNS Resolution Failure fault for service: {service}") + + # V.11 - Inject a fault by modifying the DNS policy of a service + def inject_wrong_dns_policy(self, microservices: list[str]): + for service in microservices: + patch = ( + '[{"op":"replace","path":"/spec/template/spec/dnsPolicy","value":"None"},' + '{"op":"add","path":"/spec/template/spec/dnsConfig","value":' + '{"nameservers":["8.8.8.8"],"searches":[]}}]' + ) + patch_cmd = f"kubectl patch deployment {service} -n {self.namespace} --type json -p '{patch}'" + result = self.kubectl.exec_command(patch_cmd) + print(f"Patch result for {service}: {result}") + + self.kubectl.exec_command(f"kubectl rollout restart deployment {service} -n {self.namespace}") + self.kubectl.exec_command(f"kubectl rollout status deployment {service} -n {self.namespace}") + + # Check if nameserver 8.8.8.8 present in the pods + self._wait_for_dns_policy_propagation(service, external_ns="8.8.8.8", expect_external=True) + + print(f"Injected wrong DNS policy fault for service: {service}") + + def recover_wrong_dns_policy(self, microservices: list[str]): + for service in microservices: + patch = ( + '[{"op":"remove","path":"/spec/template/spec/dnsPolicy"},' + '{"op":"remove","path":"/spec/template/spec/dnsConfig"}]' + ) + patch_cmd = f"kubectl patch deployment {service} -n {self.namespace} --type json -p '{patch}'" + result = self.kubectl.exec_command(patch_cmd) + print(f"Patch result for {service}: {result}") + + self.kubectl.exec_command(f"kubectl rollout restart deployment {service} -n {self.namespace}") + self.kubectl.exec_command(f"kubectl rollout status deployment {service} -n {self.namespace}") + + # Check if nameserver 8.8.8.8 absent in the pods + self._wait_for_dns_policy_propagation(service, external_ns="8.8.8.8", expect_external=False) + + print(f"Recovered wrong DNS policy fault for service: {service}") + + # V.12 - Inject a stale CoreDNS config breaking all .svc.cluster.local DNS resolution + def inject_stale_coredns_config(self, microservices: list[str] = None): + # Get configmap as structured data + cm_yaml = self.kubectl.exec_command("kubectl -n kube-system get cm coredns -o yaml") + cm_data = yaml.safe_load(cm_yaml) + corefile = cm_data["data"]["Corefile"] + + # Check if our template is already present (look for the exact line we inject) + template_id = "template ANY ANY svc.cluster.local" + if template_id in corefile: + print("Cluster DNS failure template already present; recovering from previous injection") + self.recover_stale_coredns_config() + + # Re-fetch after recovery + cm_yaml = self.kubectl.exec_command("kubectl -n kube-system get cm coredns -o yaml") + cm_data = yaml.safe_load(cm_yaml) + corefile = cm_data["data"]["Corefile"] + + # Create the NXDOMAIN template block + template_block = ( + " template ANY ANY svc.cluster.local {\n" + ' match ".*\\.svc\\.cluster\\.local\\.?$"\n' + " rcode NXDOMAIN\n" + " }\n" + ) + + # Find the position of "kubernetes" word + kubernetes_pos = corefile.find("kubernetes") + if kubernetes_pos == -1: + print("Could not locate 'kubernetes' plugin in Corefile") + return + + # Find the start of the line containing "kubernetes" + line_start = corefile.rfind("\n", 0, kubernetes_pos) + if line_start == -1: + line_start = 0 + else: + line_start += 1 + + # Insert template block before the kubernetes line + new_corefile = corefile[:line_start] + template_block + corefile[line_start:] + + cm_data["data"]["Corefile"] = new_corefile + + # Apply using temporary file + tmp_file_path = self._write_yaml_to_file("coredns", cm_data) + + self.kubectl.exec_command(f"kubectl apply -f {tmp_file_path}") + + # Restart CoreDNS + self.kubectl.exec_command("kubectl -n kube-system rollout restart deployment coredns") + self.kubectl.exec_command("kubectl -n kube-system rollout status deployment coredns --timeout=30s") + + print("Injected stale CoreDNS config for all .svc.cluster.local domains") + + def recover_stale_coredns_config(self, microservices: list[str] = None): + + # Get configmap as structured data + cm_yaml = self.kubectl.exec_command("kubectl -n kube-system get cm coredns -o yaml") + cm_data = yaml.safe_load(cm_yaml) + corefile = cm_data["data"]["Corefile"] + + # Check if our template is present + template_id = "template ANY ANY svc.cluster.local" + if template_id not in corefile: + print("No cluster DNS failure template found; nothing to do") + return + + lines = corefile.split("\n") + new_lines = [] + skip_block = False + + for line in lines: + # Start of template block + if not skip_block and template_id in line: + skip_block = True + continue + + # End of template block + if skip_block and line.strip() == "}": + skip_block = False + continue + + # Skip lines inside the block + if skip_block: + continue + + # Keep all other lines + new_lines.append(line) + + if skip_block: + print("WARNING: Template block was not properly closed") + return + + new_corefile = "\n".join(new_lines) + + # Verify if the removal worked + if template_id in new_corefile: + print("ERROR: Template was not successfully removed!") + return + + cm_data["data"]["Corefile"] = new_corefile + + # Apply using temporary file + tmp_file_path = self._write_yaml_to_file("coredns", cm_data) + self.kubectl.exec_command(f"kubectl apply -f {tmp_file_path}") + + # Restart CoreDNS + self.kubectl.exec_command("kubectl -n kube-system rollout restart deployment coredns") + self.kubectl.exec_command("kubectl -n kube-system rollout status deployment coredns --timeout=30s") + + print("Recovered from stale CoreDNS config for all .svc.cluster.local domains") + + # V.13 - Inject a sidecar container that binds to the same port as the main container (port conflict) + def inject_sidecar_port_conflict(self, microservices: list[str]): + for service in microservices: + + original_deployment_yaml = self._get_deployment_yaml(service) + deployment_yaml = copy.deepcopy(original_deployment_yaml) + + containers = deployment_yaml["spec"]["template"]["spec"]["containers"] + + main_container = containers[0] if containers else {} + default_port = 8080 + port = default_port + ports_list = main_container.get("ports", []) + if ports_list: + port = ports_list[0].get("containerPort", default_port) + + sidecar_container = { + "name": "sidecar", + "image": "busybox:latest", + "command": [ + "sh", + "-c", + f"exec nc -lk -p {port}", + ], + "ports": [ + { + "containerPort": port, + } + ], + } + + containers.append(sidecar_container) + + modified_yaml_path = self._write_yaml_to_file(service, deployment_yaml) + + delete_cmd = f"kubectl delete deployment {service} -n {self.namespace}" + apply_cmd = f"kubectl apply -f {modified_yaml_path} -n {self.namespace}" + + delete_result = self.kubectl.exec_command(delete_cmd) + print(f"Delete result for {service}: {delete_result}") + + apply_result = self.kubectl.exec_command(apply_cmd) + print(f"Apply result for {service}: {apply_result}") + + # Save the *original* deployment YAML for recovery + self._write_yaml_to_file(service, original_deployment_yaml) + + self.kubectl.wait_for_stable(self.namespace) + + print(f"Injected sidecar port conflict fault for service: {service}") + + def recover_sidecar_port_conflict(self, microservices: list[str]): + for service in microservices: + delete_cmd = f"kubectl delete deployment {service} -n {self.namespace}" + apply_cmd = f"kubectl apply -f /tmp/{service}_modified.yaml -n {self.namespace}" + + delete_result = self.kubectl.exec_command(delete_cmd) + print(f"Delete result for {service}: {delete_result}") + + apply_result = self.kubectl.exec_command(apply_cmd) + print(f"Apply result for {service}: {apply_result}") + + self.kubectl.wait_for_ready(self.namespace) + + print(f"Recovered from sidecar port conflict fault for service: {service}") + + # Inject a liveness probe too aggressive fault + def inject_liveness_probe_too_aggressive(self, microservices: list[str]): + for service in microservices: + + script_path = Path(__file__).parent / "custom" / f"slow_service.py" + self.deploy_custom_service(service, script_path) + + deployment_yaml = self._get_deployment_yaml(service) + original_deployment_yaml = copy.deepcopy(deployment_yaml) + + containers = deployment_yaml["spec"]["template"]["spec"]["containers"] + + for container in containers: + probe = container.get("livenessProbe") + if probe: + probe["initialDelaySeconds"] = 0 + probe["periodSeconds"] = 1 + probe["failureThreshold"] = 1 + + deployment_yaml["spec"]["template"]["spec"]["terminationGracePeriodSeconds"] = 0 + + modified_yaml_path = self._write_yaml_to_file(service, deployment_yaml) + + delete_command = f"kubectl delete deployment {service} -n {self.namespace}" + apply_command = f"kubectl apply -f {modified_yaml_path} -n {self.namespace}" + + delete_result = self.kubectl.exec_command(delete_command) + print(f"Delete result for {service}: {delete_result}") + + apply_result = self.kubectl.exec_command(apply_command) + print(f"Apply result for {service}: {apply_result}") + + # Save the *original* deployment YAML for recovery + self._write_yaml_to_file(service, original_deployment_yaml) + + self.kubectl.wait_for_stable(self.namespace) + + print(f"Injected liveness probe too aggressive fault for service: {service}") + + def recover_liveness_probe_too_aggressive(self, microservices: list[str]): + for service in microservices: + original_yaml_path = f"/tmp/{service}_modified.yaml" + + delete_command = f"kubectl delete deployment {service} -n {self.namespace}" + apply_command = f"kubectl apply -f {original_yaml_path} -n {self.namespace}" + + delete_result = self.kubectl.exec_command(delete_command) + print(f"Delete result for {service}: {delete_result}") + + apply_result = self.kubectl.exec_command(apply_command) + print(f"Apply result for {service}: {apply_result}") + + self.kubectl.wait_for_ready(self.namespace) + + print(f"Recovered from liveness probe too aggressive fault for service: {service}") + + # V.14 - Injects an environment variable leak by deleting a ConfigMap and restarting the associated deployment. + def inject_missing_configmap(self, microservices: list[str]): + for microservice in microservices: + configmap_name = None + if self.namespace == "social-network": + configmap_name = "media-mongodb" + elif self.namespace == "hotel-reservation": + configmap_name = "mongo-geo-script" + else: + raise ValueError(f"Unknown namespace: {self.namespace}") + + get_cmd = f"kubectl get configmap {configmap_name} -n {self.namespace} -o yaml" + original_yaml = self.kubectl.exec_command(get_cmd) + parsed_yaml = yaml.safe_load(original_yaml) + + self._write_yaml_to_file(microservice, parsed_yaml) + + delete_cmd = f"kubectl delete configmap {configmap_name} -n {self.namespace}" + self.kubectl.exec_command(delete_cmd) + print(f"Deleted ConfigMap: {configmap_name}") + + restart_cmd = f"kubectl rollout restart deployment {microservice} -n {self.namespace}" + self.kubectl.exec_command(restart_cmd) + print(f"Restarted pods to apply ConfigMap fault") + + def recover_missing_configmap(self, microservices: list[str]): + for microservice in microservices: + configmap_name = f"{microservice}" + backup_path = f"/tmp/{configmap_name}_modified.yaml" + + apply_cmd = f"kubectl apply -f {backup_path} -n {self.namespace}" + self.kubectl.exec_command(apply_cmd) + print(f"Restored ConfigMap: {configmap_name}") + + self.kubectl.exec_command(f"kubectl rollout restart deployment {microservice} -n {self.namespace}") + self.kubectl.exec_command(f"kubectl rollout status deployment {microservice} -n {self.namespace}") + print(f"Deployment {microservice} restarted and should now be healthy") + + # Inject ConfigMap drift by removing critical keys + def inject_configmap_drift(self, microservices: list[str]): + + for service in microservices: + + # Read the actual config.json from the running pod + read_config_cmd = f"kubectl exec deployment/{service} -n {self.namespace} -- cat /go/src/github.com/harlow/go-micro-services/config.json" + config_json_str = self.kubectl.exec_command(read_config_cmd) + original_config = json.loads(config_json_str) + print(f"Read original config from {service} pod") + + # Save the original config to a file for recovery + original_config_path = f"/tmp/{service}-original-config.json" + with open(original_config_path, "w") as f: + json.dump(original_config, f, indent=2) + print(f"Saved original config to {original_config_path}") + + fault_config = copy.deepcopy(original_config) + key_to_remove = None + + if service == "geo" and "GeoMongoAddress" in fault_config: + del fault_config["GeoMongoAddress"] + key_to_remove = "GeoMongoAddress" + else: + print(f"Service {service} not supported for ConfigMap drift fault") + continue + + configmap_name = f"{service}-config" + fault_config_json = json.dumps(fault_config, indent=2) + + create_cm_cmd = f"""kubectl create configmap {configmap_name} -n {self.namespace} --from-literal=config.json='{fault_config_json}' --dry-run=client -o yaml | kubectl apply -f -""" + self.kubectl.exec_command(create_cm_cmd) + print(f"Created ConfigMap {configmap_name} with {key_to_remove} removed") + + json_patch = [ + { + "op": "add", + "path": "/spec/template/spec/volumes/-", + "value": {"name": "config-volume", "configMap": {"name": configmap_name}}, + }, + { + "op": "add", + "path": "/spec/template/spec/containers/0/volumeMounts/-", + "value": { + "name": "config-volume", + "mountPath": "/go/src/github.com/harlow/go-micro-services/config.json", + "subPath": "config.json", + }, + }, + ] + + # Check if volumes array exists, if not create it + check_volumes_cmd = ( + f"kubectl get deployment {service} -n {self.namespace} -o jsonpath='{{.spec.template.spec.volumes}}'" + ) + volumes_exist = self.kubectl.exec_command(check_volumes_cmd).strip() + + if not volumes_exist or volumes_exist == "[]": + # Need to create the volumes array first + json_patch[0]["op"] = "add" + json_patch[0]["path"] = "/spec/template/spec/volumes" + json_patch[0]["value"] = [json_patch[0]["value"]] + + # Check if volumeMounts array exists + check_mounts_cmd = f"kubectl get deployment {service} -n {self.namespace} -o jsonpath='{{.spec.template.spec.containers[0].volumeMounts}}'" + mounts_exist = self.kubectl.exec_command(check_mounts_cmd).strip() + + if not mounts_exist or mounts_exist == "[]": + # Need to create the volumeMounts array first + json_patch[1]["op"] = "add" + json_patch[1]["path"] = "/spec/template/spec/containers/0/volumeMounts" + json_patch[1]["value"] = [json_patch[1]["value"]] + + patch_json_str = json.dumps(json_patch) + patch_cmd = f"kubectl patch deployment {service} -n {self.namespace} --type='json' -p='{patch_json_str}'" + patch_result = self.kubectl.exec_command(patch_cmd) + print(f"Patch result for {service}: {patch_result}") + + self.kubectl.exec_command(f"kubectl rollout status deployment/{service} -n {self.namespace} --timeout=30s") + + print(f"Injected ConfigMap drift fault for service: {service} - removed {key_to_remove}") + + def recover_configmap_drift(self, microservices: list[str]): + + for service in microservices: + # Use the same ConfigMap name as in injection + configmap_name = f"{service}-config" + + # Read the saved original config instead of trying to read from the pod + original_config_path = f"/tmp/{service}-original-config.json" + with open(original_config_path, "r") as f: + original_config = json.load(f) + print(f"Read original config from saved file: {original_config_path}") + + original_config_json = json.dumps(original_config, indent=2) + update_cm_cmd = f"""kubectl create configmap {configmap_name} -n {self.namespace} --from-literal=config.json='{original_config_json}' --dry-run=client -o yaml | kubectl apply -f -""" + self.kubectl.exec_command(update_cm_cmd) + print(f"Updated ConfigMap {configmap_name} with complete configuration") + + self.kubectl.exec_command(f"kubectl rollout restart deployment/{service} -n {self.namespace}") + self.kubectl.exec_command(f"kubectl rollout status deployment/{service} -n {self.namespace} --timeout=30s") + + print(f"Recovered ConfigMap drift fault for service: {service}") + + # V.14 - Inject a readiness probe misconfiguration fault + def inject_readiness_probe_misconfiguration(self, microservices: list[str]): + for service in microservices: + + deployment_yaml = self._get_deployment_yaml(service) + original_deployment_yaml = copy.deepcopy(deployment_yaml) + + containers = deployment_yaml["spec"]["template"]["spec"]["containers"] + + initial_delay = 10 + + for container in containers: + container["readinessProbe"] = { + "httpGet": {"path": f"/healthz", "port": 8080}, + "initialDelaySeconds": initial_delay, + "periodSeconds": 10, + "failureThreshold": 1, + } + + modified_yaml_path = self._write_yaml_to_file(service, deployment_yaml) + + delete_command = f"kubectl delete deployment {service} -n {self.namespace}" + apply_command = f"kubectl apply -f {modified_yaml_path} -n {self.namespace}" + + delete_result = self.kubectl.exec_command(delete_command) + print(f"Delete result for {service}: {delete_result}") + + apply_result = self.kubectl.exec_command(apply_command) + print(f"Apply result for {service}: {apply_result}") + + # Save the *original* deployment YAML for recovery + self._write_yaml_to_file(service, original_deployment_yaml) + + print(f"Injected readiness probe misconfiguration fault for service: {service}") + + def recover_readiness_probe_misconfiguration(self, microservices: list[str]): + for service in microservices: + + original_yaml_path = f"/tmp/{service}_modified.yaml" + + delete_command = f"kubectl delete deployment {service} -n {self.namespace}" + apply_command = f"kubectl apply -f {original_yaml_path} -n {self.namespace}" + + delete_result = self.kubectl.exec_command(delete_command) + print(f"Delete result for {service}: {delete_result}") + + apply_result = self.kubectl.exec_command(apply_command) + print(f"Apply result for {service}: {apply_result}") + + self.kubectl.wait_for_ready(self.namespace) + + print(f"Recovered from readiness probe misconfiguration fault for service: {service}") + + # V.15 - Inject a liveness probe misconfiguration fault + def inject_liveness_probe_misconfiguration(self, microservices: list[str]): + for service in microservices: + + deployment_yaml = self._get_deployment_yaml(service) + original_deployment_yaml = copy.deepcopy(deployment_yaml) + + containers = deployment_yaml["spec"]["template"]["spec"]["containers"] + initial_delay = 10 + + for container in containers: + container["livenessProbe"] = { + "httpGet": {"path": f"/healthz", "port": 8080}, + "initialDelaySeconds": initial_delay, + "periodSeconds": 10, + "failureThreshold": 1, + } + + # Set terminationGracePeriodSeconds at the pod template spec level (not inside a container spec) + deployment_yaml["spec"]["template"]["spec"]["terminationGracePeriodSeconds"] = 0 + + modified_yaml_path = self._write_yaml_to_file(service, deployment_yaml) + + delete_command = f"kubectl delete deployment {service} -n {self.namespace}" + apply_command = f"kubectl apply -f {modified_yaml_path} -n {self.namespace}" + + delete_result = self.kubectl.exec_command(delete_command) + print(f"Delete result for {service}: {delete_result}") + + apply_result = self.kubectl.exec_command(apply_command) + print(f"Apply result for {service}: {apply_result}") + + # Save the *original* deployment YAML for recovery + self._write_yaml_to_file(service, original_deployment_yaml) + + print(f"Injected liveness probe misconfiguration fault for service: {service}") + + def recover_liveness_probe_misconfiguration(self, microservices: list[str]): + for service in microservices: + original_yaml_path = f"/tmp/{service}_modified.yaml" + + delete_command = f"kubectl delete deployment {service} -n {self.namespace}" + apply_command = f"kubectl apply -f {original_yaml_path} -n {self.namespace}" + + delete_result = self.kubectl.exec_command(delete_command) + print(f"Delete result for {service}: {delete_result}") + + apply_result = self.kubectl.exec_command(apply_command) + print(f"Apply result for {service}: {apply_result}") + + self.kubectl.wait_for_ready(self.namespace) + + print(f"Recovered from liveness probe misconfiguration fault for service: {service}") + + # Duplicate PVC mounts multiple replicas share ReadWriteOnce PVC causing mount conflict + def inject_duplicate_pvc_mounts(self, microservices: list[str]): + for service in microservices: + + deployment_yaml = self._get_deployment_yaml(service) + # original_yaml = copy.deepcopy(deployment_yaml) + + # Create a single PVC that every replica will try to use + pvc_name = f"{service}-pvc" + pvc_manifest = { + "apiVersion": "v1", + "kind": "PersistentVolumeClaim", + "metadata": {"name": pvc_name, "namespace": self.namespace}, + "spec": {"accessModes": ["ReadWriteOnce"], "resources": {"requests": {"storage": "1Gi"}}}, + } + + pvc_json = json.dumps(pvc_manifest) + self.kubectl.exec_command(f"kubectl apply -f - < 1 to create potential deadlock + if "replicas" not in deployment_yaml["spec"] or deployment_yaml["spec"]["replicas"] < 2: + deployment_yaml["spec"]["replicas"] = 3 # Force multiple replicas + + # Create anti-affinity rules that prevent pods from being scheduled on same nodes + anti_affinity_rules = { + "podAntiAffinity": { + "requiredDuringSchedulingIgnoredDuringExecution": [ + { + "labelSelector": { + "matchExpressions": [{"key": "app", "operator": "In", "values": [service]}] + }, + "topologyKey": "kubernetes.io/hostname", + } + ] + } + } + + # Add affinity to deployment spec + if "affinity" not in deployment_yaml["spec"]["template"]["spec"]: + deployment_yaml["spec"]["template"]["spec"]["affinity"] = {} + + deployment_yaml["spec"]["template"]["spec"]["affinity"].update(anti_affinity_rules) + + # Write the modified YAML to a temporary file + modified_yaml_path = self._write_yaml_to_file(service, deployment_yaml) + + # Delete and redeploy with anti-affinity rules + delete_command = f"kubectl delete deployment {service} -n {self.namespace}" + self.kubectl.exec_command(delete_command) + + apply_command = f"kubectl apply -f {modified_yaml_path} -n {self.namespace}" + self.kubectl.exec_command(apply_command) + + print(f"Injected pod anti-affinity deadlock for service: {service}") + print(f" - Set replicas to {deployment_yaml['spec']['replicas']}") + print(f" - Added strict anti-affinity rules") + + def recover_pod_anti_affinity_deadlock(self, microservices: list[str]): + """ + Recover from pod anti-affinity deadlock by removing anti-affinity rules. + """ + for service in microservices: + deployment_yaml = self._get_deployment_yaml(service) + + # Remove affinity rules + if "affinity" in deployment_yaml["spec"]["template"]["spec"]: + if "podAntiAffinity" in deployment_yaml["spec"]["template"]["spec"]["affinity"]: + del deployment_yaml["spec"]["template"]["spec"]["affinity"]["podAntiAffinity"] + + # If affinity is now empty, remove it entirely + if not deployment_yaml["spec"]["template"]["spec"]["affinity"]: + del deployment_yaml["spec"]["template"]["spec"]["affinity"] + + # Reset replicas to 1 for recovery + deployment_yaml["spec"]["replicas"] = 1 + + # Write the modified YAML to a temporary file + modified_yaml_path = self._write_yaml_to_file(service, deployment_yaml) + + # Delete and redeploy without anti-affinity rules + delete_command = f"kubectl delete deployment {service} -n {self.namespace}" + self.kubectl.exec_command(delete_command) + + apply_command = f"kubectl apply -f {modified_yaml_path} -n {self.namespace}" + self.kubectl.exec_command(apply_command) + + print(f"Recovered pod anti-affinity deadlock for service: {service}") + print(f" - Removed anti-affinity rules") + print(f" - Reset replicas to 1") + + def inject_rpc_timeout_retries_misconfiguration(self, configmap: str): + GRPC_CLIENT_TIMEOUT = "50ms" + GRPC_CLIENT_RETRIES_ON_ERROR = "30" + config_patch_command = f'kubectl patch configmap {configmap} -n {self.namespace} -p \'{{"data":{{"GRPC_CLIENT_TIMEOUT":"{GRPC_CLIENT_TIMEOUT}","GRPC_CLIENT_RETRIES_ON_ERROR":"{GRPC_CLIENT_RETRIES_ON_ERROR}"}}}}\'' + self.kubectl.exec_command(config_patch_command) + deployment_rollout_command = f"kubectl rollout restart deployment -l configmap={configmap} -n {self.namespace}" + self.kubectl.exec_command(deployment_rollout_command) + self.kubectl.wait_for_ready(self.namespace) + + def recover_rpc_timeout_retries_misconfiguration(self, configmap: str): + GRPC_CLIENT_TIMEOUT = "1s" + GRPC_CLIENT_RETRIES_ON_ERROR = "1" + config_patch_command = f'kubectl patch configmap {configmap} -n {self.namespace} -p \'{{"data":{{"GRPC_CLIENT_TIMEOUT":"{GRPC_CLIENT_TIMEOUT}","GRPC_CLIENT_RETRIES_ON_ERROR":"{GRPC_CLIENT_RETRIES_ON_ERROR}"}}}}\'' + self.kubectl.exec_command(config_patch_command) + deployment_rollout_command = f"kubectl rollout restart deployment -l configmap={configmap} -n {self.namespace}" + self.kubectl.exec_command(deployment_rollout_command) + self.kubectl.wait_for_ready(self.namespace) + + def inject_daemon_set_image_replacement(self, daemon_set_name: str, new_image: str): + daemon_set_yaml = self._get_daemon_set_yaml(daemon_set_name) + + # print(f"Daemon set yaml: {daemon_set_yaml}") + + # Replace the image in all containers + if "spec" in daemon_set_yaml and "template" in daemon_set_yaml["spec"]: + template_spec = daemon_set_yaml["spec"]["template"]["spec"] + if "containers" in template_spec: + for container in template_spec["containers"]: + if "image" in container: + container["image"] = new_image + + modified_yaml_path = self._write_yaml_to_file(daemon_set_name, daemon_set_yaml) # backup the yaml + + self.kubectl.exec_command(f"kubectl apply -f {modified_yaml_path}") + self.kubectl.exec_command(f"kubectl rollout restart ds {daemon_set_name} -n {self.namespace}") + self.kubectl.exec_command(f"kubectl rollout status ds {daemon_set_name} -n {self.namespace} --timeout=60s") + + def recover_daemon_set_image_replacement(self, daemon_set_name: str, original_image: str): + daemon_set_yaml = self._get_daemon_set_yaml(daemon_set_name) + if "spec" in daemon_set_yaml and "template" in daemon_set_yaml["spec"]: + template_spec = daemon_set_yaml["spec"]["template"]["spec"] + if "containers" in template_spec: + for container in template_spec["containers"]: + if "image" in container and container["image"] != original_image: + container["image"] = original_image + modified_yaml_path = self._write_yaml_to_file(daemon_set_name, daemon_set_yaml) + self.kubectl.exec_command(f"kubectl apply -f {modified_yaml_path}") + self.kubectl.exec_command(f"kubectl rollout restart ds {daemon_set_name} -n {self.namespace}") + self.kubectl.exec_command( + f"kubectl rollout status ds {daemon_set_name} -n {self.namespace} --timeout=60s" + ) + return + + def inject_rbac_misconfiguration(self, microservices: list[str]): + for service in microservices: + configmap_manifest = { + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": {"name": "app-routing-config", "namespace": self.namespace}, + "data": {"routes.json": '{"enabled": true, "version": "1.0"}'}, + } + + cm_json = json.dumps(configmap_manifest) + self.kubectl.exec_command(f"kubectl apply -f - < /dev/null 2>&1; do + sleep 1 +done +echo "MongoDB started" + +# Create the admin user (will fail if the user already exists) +echo "Creating admin user..." +mongo admin --eval "db.createUser({user: '$ADMIN_USER', pwd: '$ADMIN_PWD', roles:[{role:'userAdminAnyDatabase',db:'admin'}]});" + +# Grant readWrite role on the target database +echo "Granting readWrite role to $ADMIN_USER on $TARGET_DB database..." +mongo admin -u $ADMIN_USER -p $ADMIN_PWD --authenticationDatabase admin \ + --eval "db.grantRolesToUser('$ADMIN_USER', [{role: '$READ_WRITE_ROLE', db: '$TARGET_DB'}]);" + +# Create the root user (will fail if the user already exists) +echo "Creating root user..." +mongo admin --eval "db.createUser({user: '$ROOT_USER', pwd: '$ROOT_PWD', roles:[{role:'userAdminAnyDatabase',db:'admin'}]});" + +# Grant readWrite role on the target database to root +echo "Granting readWrite role to $ROOT_USER on $TARGET_DB database..." +mongo admin -u $ROOT_USER -p $ROOT_PWD --authenticationDatabase admin \ + --eval "db.grantRolesToUser('$ROOT_USER', [{role: '$READ_WRITE_ROLE', db: '$TARGET_DB'}]);" + + +echo "Initialization script completed" \ No newline at end of file diff --git a/sregym/generators/fault/script/k8s-rate-mongo.sh b/sregym/generators/fault/script/k8s-rate-mongo.sh new file mode 100644 index 0000000..8da30e7 --- /dev/null +++ b/sregym/generators/fault/script/k8s-rate-mongo.sh @@ -0,0 +1,38 @@ +#!/bin/bash + + +ADMIN_USER="admin" +ADMIN_PWD="admin" + +ROOT_USER="root" +ROOT_PWD="root" + +TARGET_DB="rate-db" +READ_WRITE_ROLE="readWrite" + +echo "Waiting for MongoDB to start..." +until mongo --eval "print('waited for connection')" > /dev/null 2>&1; do + sleep 1 +done +echo "MongoDB started" + +# Create the admin user (will fail if the user already exists) +echo "Creating admin user..." +mongo admin --eval "db.createUser({user: '$ADMIN_USER', pwd: '$ADMIN_PWD', roles:[{role:'userAdminAnyDatabase',db:'admin'}]});" + +# Grant readWrite role on the target database +echo "Granting readWrite role to $ADMIN_USER on $TARGET_DB database..." +mongo admin -u $ADMIN_USER -p $ADMIN_PWD --authenticationDatabase admin \ + --eval "db.grantRolesToUser('$ADMIN_USER', [{role: '$READ_WRITE_ROLE', db: '$TARGET_DB'}]);" + +# Create the root user (will fail if the user already exists) +echo "Creating root user..." +mongo admin --eval "db.createUser({user: '$ROOT_USER', pwd: '$ROOT_PWD', roles:[{role:'userAdminAnyDatabase',db:'admin'}]});" + +# Grant readWrite role on the target database to root +echo "Granting readWrite role to $ROOT_USER on $TARGET_DB database..." +mongo admin -u $ROOT_USER -p $ROOT_PWD --authenticationDatabase admin \ + --eval "db.grantRolesToUser('$ROOT_USER', [{role: '$READ_WRITE_ROLE', db: '$TARGET_DB'}]);" + + +echo "Initialization script completed" \ No newline at end of file diff --git a/sregym/generators/fault/script/remove-admin-mongo.sh b/sregym/generators/fault/script/remove-admin-mongo.sh new file mode 100644 index 0000000..980a239 --- /dev/null +++ b/sregym/generators/fault/script/remove-admin-mongo.sh @@ -0,0 +1,12 @@ +#!/bin/bash + + +ROOT_USER="root" +ROOT_PWD="root" + +echo "Removing admin user..." + +mongo admin -u $ROOT_USER -p $ROOT_PWD --authenticationDatabase admin \ + --eval "db.dropUser('admin');" + +echo "Admin user removed successfully" diff --git a/sregym/generators/fault/script/remove-mitigate-admin-geo-mongo.sh b/sregym/generators/fault/script/remove-mitigate-admin-geo-mongo.sh new file mode 100644 index 0000000..117404d --- /dev/null +++ b/sregym/generators/fault/script/remove-mitigate-admin-geo-mongo.sh @@ -0,0 +1,25 @@ +#!/bin/bash + + +ROOT_USER="root" +ROOT_PWD="root" + +ADMIN_USER="admin" +ADMIN_PWD="admin" +READ_WRITE_ROLE="readWrite" +TARGET_DB="geo-db" + +echo "Recreating admin user..." + +# Connect to MongoDB and create the admin user +mongo admin -u $ROOT_USER -p $ROOT_PWD --authenticationDatabase admin \ + --eval "db.createUser({user: '$ADMIN_USER', pwd: '$ADMIN_PWD', roles:[{role:'userAdminAnyDatabase',db:'admin'}]});" + +echo "Admin user recreated" + +# Grant readWrite role on the target database +echo "Granting readWrite role to $ADMIN_USER on $TARGET_DB database..." +mongo admin -u $ROOT_USER -p $ROOT_PWD --authenticationDatabase admin \ + --eval "db.grantRolesToUser('$ADMIN_USER', [{role: '$READ_WRITE_ROLE', db: '$TARGET_DB'}]);" + +echo "Privileges restored successfully" diff --git a/sregym/generators/fault/script/remove-mitigate-admin-rate-mongo.sh b/sregym/generators/fault/script/remove-mitigate-admin-rate-mongo.sh new file mode 100644 index 0000000..dc8d6a2 --- /dev/null +++ b/sregym/generators/fault/script/remove-mitigate-admin-rate-mongo.sh @@ -0,0 +1,26 @@ +#!/bin/bash + + +ROOT_USER="root" +ROOT_PWD="root" + +ADMIN_USER="admin" +ADMIN_PWD="admin" +READ_WRITE_ROLE="readWrite" +TARGET_DB="rate-db" +# TARGET_DB="geo-db" + +echo "Recreating admin user..." + +# Connect to MongoDB and create the admin user +mongo admin -u $ROOT_USER -p $ROOT_PWD --authenticationDatabase admin \ + --eval "db.createUser({user: '$ADMIN_USER', pwd: '$ADMIN_PWD', roles:[{role:'userAdminAnyDatabase',db:'admin'}]});" + +echo "Admin user recreated" + +# Grant readWrite role on the target database +echo "Granting readWrite role to $ADMIN_USER on $TARGET_DB database..." +mongo admin -u $ROOT_USER -p $ROOT_PWD --authenticationDatabase admin \ + --eval "db.grantRolesToUser('$ADMIN_USER', [{role: '$READ_WRITE_ROLE', db: '$TARGET_DB'}]);" + +echo "Privileges restored successfully" diff --git a/sregym/generators/fault/script/revoke-admin-geo-mongo.sh b/sregym/generators/fault/script/revoke-admin-geo-mongo.sh new file mode 100644 index 0000000..c8cb0b0 --- /dev/null +++ b/sregym/generators/fault/script/revoke-admin-geo-mongo.sh @@ -0,0 +1,15 @@ +#!/bin/bash + + +ADMIN_USER="admin" +ADMIN_PWD="admin" +# TARGET_DB="rate-db" +TARGET_DB="geo-db" + +echo "Downgrading admin user privileges..." + +# Connect to MongoDB and revoke roles +mongo admin -u $ADMIN_USER -p $ADMIN_PWD --authenticationDatabase admin \ + --eval "db.revokeRolesFromUser('$ADMIN_USER', [{role: 'readWrite', db: '$TARGET_DB'}]);" + +echo "Privileges downgraded" diff --git a/sregym/generators/fault/script/revoke-admin-rate-mongo.sh b/sregym/generators/fault/script/revoke-admin-rate-mongo.sh new file mode 100644 index 0000000..3887204 --- /dev/null +++ b/sregym/generators/fault/script/revoke-admin-rate-mongo.sh @@ -0,0 +1,15 @@ +#!/bin/bash + + +ADMIN_USER="admin" +ADMIN_PWD="admin" +TARGET_DB="rate-db" +# TARGET_DB="geo-db" + +echo "Downgrading admin user privileges..." + +# Connect to MongoDB and revoke roles +mongo admin -u $ADMIN_USER -p $ADMIN_PWD --authenticationDatabase admin \ + --eval "db.revokeRolesFromUser('$ADMIN_USER', [{role: 'readWrite', db: '$TARGET_DB'}]);" + +echo "Privileges downgraded" diff --git a/sregym/generators/fault/script/revoke-mitigate-admin-geo-mongo.sh b/sregym/generators/fault/script/revoke-mitigate-admin-geo-mongo.sh new file mode 100644 index 0000000..b5cd6fb --- /dev/null +++ b/sregym/generators/fault/script/revoke-mitigate-admin-geo-mongo.sh @@ -0,0 +1,17 @@ +#!/bin/bash + + +ADMIN_USER="admin" +ADMIN_PWD="admin" + +# TARGET_DB="rate-db" +TARGET_DB="geo-db" +READ_WRITE_ROLE="readWrite" + +echo "Restoring readWrite privilege to the $ADMIN_USER user for the $TARGET_DB database..." + +# Grant readWrite role on the target database +mongo admin -u $ADMIN_USER -p $ADMIN_PWD --authenticationDatabase admin \ + --eval "db.grantRolesToUser('$ADMIN_USER', [{role: '$READ_WRITE_ROLE', db: '$TARGET_DB'}]);" + +echo "Privilege restored successfully" \ No newline at end of file diff --git a/sregym/generators/fault/script/revoke-mitigate-admin-rate-mongo.sh b/sregym/generators/fault/script/revoke-mitigate-admin-rate-mongo.sh new file mode 100644 index 0000000..f1fa99d --- /dev/null +++ b/sregym/generators/fault/script/revoke-mitigate-admin-rate-mongo.sh @@ -0,0 +1,17 @@ +#!/bin/bash + + +ADMIN_USER="admin" +ADMIN_PWD="admin" + +TARGET_DB="rate-db" +# TARGET_DB="geo-db" +READ_WRITE_ROLE="readWrite" + +echo "Restoring readWrite privilege to the $ADMIN_USER user for the $TARGET_DB database..." + +# Grant readWrite role on the target database +mongo admin -u $ADMIN_USER -p $ADMIN_PWD --authenticationDatabase admin \ + --eval "db.grantRolesToUser('$ADMIN_USER', [{role: '$READ_WRITE_ROLE', db: '$TARGET_DB'}]);" + +echo "Privilege restored successfully" \ No newline at end of file diff --git a/sregym/generators/noise/transient_issues/chaos_injector.py b/sregym/generators/noise/transient_issues/chaos_injector.py new file mode 100644 index 0000000..1b8db0b --- /dev/null +++ b/sregym/generators/noise/transient_issues/chaos_injector.py @@ -0,0 +1,73 @@ +import os +from typing import List + +import yaml +from kubernetes import client + +from sregym.service.helm import Helm +from sregym.service.kubectl import KubeCtl + + +class ChaosInjector: + def __init__(self, namespace: str): + self.namespace = namespace + self.kubectl = KubeCtl() + self.kubectl.create_namespace_if_not_exist("chaos-mesh") + Helm.add_repo("chaos-mesh", "https://charts.chaos-mesh.org") + chaos_configs = { + "release_name": "chaos-mesh", + "chart_path": "chaos-mesh/chaos-mesh", + "namespace": "chaos-mesh", + "version": "2.6.2", + } + + container_runtime = self.kubectl.get_container_runtime() + + if "docker" in container_runtime: + pass + elif "containerd" in container_runtime: + chaos_configs["extra_args"] = [ + "--set chaosDaemon.runtime=containerd", + "--set chaosDaemon.socketPath=/run/containerd/containerd.sock", + ] + else: + raise ValueError(f"Unsupported container runtime: {container_runtime}") + + # Disable security for the dashboard + if chaos_configs.get("extra_args"): + chaos_configs["extra_args"].append("--set dashboard.securityMode=false") + else: + chaos_configs["extra_args"] = ["--set dashboard.securityMode=false"] + + # Check if the release already exists + release_exists = Helm.exists_release(chaos_configs["release_name"], chaos_configs["namespace"]) + if not release_exists: + Helm.install(**chaos_configs) + self.kubectl.wait_for_ready("chaos-mesh") + else: + print( + f"[ChaosInjector] Helm release '{chaos_configs['release_name']}' already exists in namespace '{chaos_configs['namespace']}', skipping install." + ) + + def create_chaos_experiment(self, experiment_yaml: dict, experiment_name: str): + try: + chaos_yaml_path = f"/tmp/{experiment_name}.yaml" + with open(chaos_yaml_path, "w") as file: + yaml.dump(experiment_yaml, file) + command = f"kubectl apply -f {chaos_yaml_path}" + result = self.kubectl.exec_command(command) + print(f"Applied {experiment_name} chaos experiment: {result}") + except Exception as e: + raise RuntimeError(f"Error applying chaos experiment: {e}") from e + + def delete_chaos_experiment(self, experiment_name: str): + try: + chaos_yaml_path = f"/tmp/{experiment_name}.yaml" + command = f"kubectl delete -f {chaos_yaml_path}" + result = self.kubectl.exec_command(command) + print(f"Cleaned up chaos experiment: {result}") + except Exception as e: + chaos_yaml_path = f"/tmp/{experiment_name}.yaml" + command = f"kubectl delete -f {chaos_yaml_path} --force --grace-period=0" + result = self.kubectl.exec_command(command) + raise RuntimeError(f"Error cleaning up chaos experiment: {e}") from e diff --git a/sregym/generators/noise/transient_issues/configuration.yml b/sregym/generators/noise/transient_issues/configuration.yml new file mode 100644 index 0000000..b236863 --- /dev/null +++ b/sregym/generators/noise/transient_issues/configuration.yml @@ -0,0 +1,10 @@ +switch: False +min_duration: 40 +max_duration: 60 +fault_types: + - FAIL_SLOW + - FAIL_STOP +scopes: + - TARGET_NAMESPACE +interval_min: 20 +interval_max: 30 \ No newline at end of file diff --git a/sregym/generators/noise/transient_issues/transient_issues.py b/sregym/generators/noise/transient_issues/transient_issues.py new file mode 100644 index 0000000..e9903eb --- /dev/null +++ b/sregym/generators/noise/transient_issues/transient_issues.py @@ -0,0 +1,1094 @@ +import random +import time +import threading +from typing import List, Dict, Optional, Set +from dataclasses import dataclass +from enum import Enum +from kubernetes import client +from sregym.generators.noise.transient_issues.chaos_injector import ChaosInjector +from sregym.service.kubectl import KubeCtl + + +class FaultType(Enum): + """Fault type enumeration""" + FAIL_STOP = "fail-stop" # Stop-type faults + FAIL_SLOW = "fail-slow" # Slow-down type faults + + +class PodScope(Enum): + """Pod scope level enumeration""" + TARGET_SERVICE = "target_service" # Within target service + TARGET_NAMESPACE = "target_namespace" # Within target namespace + NON_TARGET_SERVICE = "non_target_service" # Outside target service but within target namespace + ALL_PODS = "all_pods" # All pods + NON_TARGET_NAMESPACE = "non_target_namespace" # All pods outside target namespace + + +@dataclass +class TransientExperiment: + """Represents a transient chaos experiment""" + name: str + experiment_type: str + fault_type: FaultType + target_pods: List[str] + scope: PodScope + duration: int # Duration in seconds + start_time: float + cleanup_timer: Optional[threading.Timer] = None + + +class TransientIssuesGenerator: + """Generate transient cluster issues as noise""" + + def __init__(self, namespace: str, target_services: List[str] = None, + min_duration: int = 30, max_duration: int = 300): + """ + Initialize transient issues generator + + Args: + namespace: Target namespace + target_services: List of target services, used to determine scope + min_duration: Minimum duration in seconds + max_duration: Maximum duration in seconds + """ + self.namespace = namespace + self.target_services = target_services or [] + self.min_duration = min_duration + self.max_duration = max_duration + self.kubectl = KubeCtl() + self.chaos_injector = ChaosInjector(namespace) + + # Active transient experiments + self.active_experiments: Dict[str, TransientExperiment] = {} + + # Continuous injection control + self._injection_running = False + self._injection_thread = None + self._stop_event = threading.Event() + + # Available chaos experiment types and their configurations + self.experiment_types = { + # Fail-Stop type faults + "pod-kill": { + "weight": 3, + "fault_type": FaultType.FAIL_STOP, + "method": self._inject_pod_kill, + "cleanup_method": self._cleanup_pod_kill, + "description": "Randomly kill pods", + "scopes": [PodScope.TARGET_SERVICE, PodScope.TARGET_NAMESPACE, + PodScope.NON_TARGET_SERVICE, PodScope.ALL_PODS, PodScope.NON_TARGET_NAMESPACE] + }, + "container-kill": { + "weight": 2, + "fault_type": FaultType.FAIL_STOP, + "method": self._inject_container_kill, + "cleanup_method": self._cleanup_container_kill, + "description": "Kill containers", + "scopes": [PodScope.TARGET_SERVICE, PodScope.TARGET_NAMESPACE, + PodScope.NON_TARGET_SERVICE, PodScope.ALL_PODS, PodScope.NON_TARGET_NAMESPACE] + }, + "pod-failure": { + "weight": 2, + "fault_type": FaultType.FAIL_STOP, + "method": self._inject_pod_failure, + "cleanup_method": self._cleanup_pod_failure, + "description": "Inject pod failure", + "scopes": [PodScope.TARGET_SERVICE, PodScope.TARGET_NAMESPACE, + PodScope.NON_TARGET_SERVICE, PodScope.ALL_PODS, PodScope.NON_TARGET_NAMESPACE] + }, + "network-loss": { + "weight": 1, + "fault_type": FaultType.FAIL_STOP, + "method": self._inject_network_loss, + "cleanup_method": self._cleanup_network_loss, + "description": "Inject network packet loss", + "scopes": [PodScope.TARGET_SERVICE, PodScope.TARGET_NAMESPACE, + PodScope.NON_TARGET_SERVICE, PodScope.ALL_PODS, PodScope.NON_TARGET_NAMESPACE] + }, + "network-partition": { + "weight": 1, + "fault_type": FaultType.FAIL_STOP, + "method": self._inject_network_partition, + "cleanup_method": self._cleanup_network_partition, + "description": "Inject network partition between services", + "scopes": [PodScope.TARGET_SERVICE, PodScope.TARGET_NAMESPACE, + PodScope.NON_TARGET_SERVICE, PodScope.ALL_PODS, PodScope.NON_TARGET_NAMESPACE] + }, + + # Fail-Slow type faults + "network-delay": { + "weight": 2, + "fault_type": FaultType.FAIL_SLOW, + "method": self._inject_network_delay, + "cleanup_method": self._cleanup_network_delay, + "description": "Inject network delay", + "scopes": [PodScope.TARGET_SERVICE, PodScope.TARGET_NAMESPACE, + PodScope.NON_TARGET_SERVICE, PodScope.ALL_PODS, PodScope.NON_TARGET_NAMESPACE] + }, + "cpu-stress": { + "weight": 2, + "fault_type": FaultType.FAIL_SLOW, + "method": self._inject_cpu_stress, + "cleanup_method": self._cleanup_cpu_stress, + "description": "Inject CPU stress", + "scopes": [PodScope.TARGET_SERVICE, PodScope.TARGET_NAMESPACE, + PodScope.NON_TARGET_SERVICE, PodScope.ALL_PODS, PodScope.NON_TARGET_NAMESPACE] + }, + "memory-stress": { + "weight": 1, + "fault_type": FaultType.FAIL_SLOW, + "method": self._inject_memory_stress, + "cleanup_method": self._cleanup_memory_stress, + "description": "Inject memory stress", + "scopes": [PodScope.TARGET_SERVICE, PodScope.TARGET_NAMESPACE, + PodScope.NON_TARGET_SERVICE, PodScope.ALL_PODS, PodScope.NON_TARGET_NAMESPACE] + } + } + self.cleanup_all_experiments() + + + ### Injection and cleanup + def inject_transient_issue(self, fault_types: List[FaultType] = None, + scopes: List[PodScope] = None, experiment: str = None) -> Optional[str]: + """Inject a transient issue""" + try: + # Randomly select experiment type + experiment_type = self.select_random_experiment_type(fault_types, scopes) + if experiment: + experiment_type = experiment + if not experiment_type: + return None + + # Randomly select scope + if scopes: + available_scopes = [s for s in scopes if s in self.experiment_types[experiment_type]["scopes"]] + else: + available_scopes = self.experiment_types[experiment_type]["scopes"] + + if not available_scopes: + print("No available scopes") + return None + + selected_scope = random.choice(available_scopes) + # Get target pods based on scope + count = 2 if experiment_type in ["network-partition"] else 1 + target_pods = self.get_pods_by_scope(selected_scope, count) + if not target_pods: + print("No available target pods, skipping injection") + return None + + # Randomly generate duration + duration = random.randint(self.min_duration, self.max_duration) + + # Generate experiment name + experiment_name = self.generate_experiment_name(experiment_type) + + fault_type = self.experiment_types[experiment_type]["fault_type"] + + print(f"🔥 Injecting transient issue: {experiment_type}") + print(f" Experiment name: {experiment_name}") + print(f" Fault type: {fault_type.value}") + print(f" Scope level: {selected_scope.value}") + print(f" Target services: {target_pods}") + print(f" Duration: {duration} seconds") + print(f" Description: {self.experiment_types[experiment_type]['description']}") + + # Create experiment record + experiment = TransientExperiment( + name=experiment_name, + experiment_type=experiment_type, + fault_type=fault_type, + target_pods=target_pods, + scope=selected_scope, + duration=duration, + start_time=time.time() + ) + + # Execute injection + inject_method = self.experiment_types[experiment_type]["method"] + success = inject_method(experiment_name, target_pods) + + if success: + # Set auto cleanup timer + cleanup_timer = threading.Timer( + duration, + self._auto_cleanup, + args=[experiment_name] + ) + experiment.cleanup_timer = cleanup_timer + cleanup_timer.start() + + # Record active experiment + self.active_experiments[experiment_name] = experiment + + print(f"✅ Transient issue injection successful, will auto recover in {duration} seconds") + return experiment_name + else: + print(f"❌ Transient issue injection failed") + return None + + except Exception as e: + raise RuntimeError(f"Error injecting transient issue: {e}") from e + + def _auto_cleanup(self, experiment_name: str): + """Automatically cleanup experiment""" + try: + if experiment_name in self.active_experiments: + experiment = self.active_experiments[experiment_name] + elapsed = time.time() - experiment.start_time + + print(f"🔄 Auto recovering transient issue: {experiment.experiment_type}") + print(f" Experiment name: {experiment_name}") + print(f" Fault type: {experiment.fault_type.value}") + print(f" Scope level: {experiment.scope.value}") + print(f" Actual duration: {elapsed:.1f} seconds") + + # Execute cleanup + cleanup_method = self.experiment_types[experiment.experiment_type]["cleanup_method"] + cleanup_method(experiment_name) + # Remove record + del self.active_experiments[experiment_name] + + print(f"✅ Transient issue auto recovered") + + except Exception as e: + raise RuntimeError(f"Error auto cleaning experiment: {e}") from e + + def cleanup_experiment(self, experiment_name: str) -> bool: + """Manually cleanup specified experiment""" + try: + if experiment_name not in self.active_experiments: + print(f"Experiment {experiment_name} does not exist or has been cleaned up") + return False + + experiment = self.active_experiments[experiment_name] + + # Cancel timer + if experiment.cleanup_timer: + experiment.cleanup_timer.cancel() + + # Execute cleanup + cleanup_method = self.experiment_types[experiment.experiment_type]["cleanup_method"] + cleanup_method(experiment_name) + + # Remove record + del self.active_experiments[experiment_name] + + print(f"✅ Manual cleanup of experiment {experiment_name} successful") + return True + + except Exception as e: + raise RuntimeError(f"Error manually cleaning experiment: {e}") from e + + def cleanup_all_experiments(self): + """Cleanup all active experiments""" + experiment_names = list(self.active_experiments.keys()) + for name in experiment_names: + self.cleanup_experiment(name) + experiment_types=["PodChaos", "NetworkChaos", "StressChaos"] + for experiment_type in experiment_types: + delete_cmd = f"kubectl delete {experiment_type} --all -n chaos-mesh --ignore-not-found=true" + self.kubectl.exec_command(delete_cmd) + + + def start_continuous_injection(self, fault_types: List[FaultType] = None, + scopes: List[PodScope] = None, + interval_min: int = 60, interval_max: int = 300): + """Start continuous transient issue injection + + Args: + fault_types: List of allowed fault types, None means all types + scopes: List of allowed scope levels, None means all scopes + interval_min: Minimum injection interval in seconds + interval_max: Maximum injection interval in seconds + """ + if self._injection_running: + print("⚠️ Continuous injection already running, please stop current injection first") + return False + + def injection_loop(): + print(f"🚀 Continuous transient issue injection started") + print(f" Allowed fault types: {[ft.value for ft in fault_types] if fault_types else 'All types'}") + print(f" Allowed scope levels: {[s.value for s in scopes] if scopes else 'All scopes'}") + print(f" Injection interval: {interval_min}-{interval_max} seconds") + + while not self._stop_event.is_set(): + try: + # Inject a transient issue + self.inject_transient_issue(fault_types, scopes) + + # Wait randomly for next injection, but check stop signal periodically + next_interval = random.randint(interval_min, interval_max) + print(f"⏰ Next injection will be in {next_interval} seconds") + + # Wait in segments, checking stop signal every second + for _ in range(next_interval): + if self._stop_event.is_set(): + break + time.sleep(1) + + except Exception as e: + # For continuous injection loops, still use print to log errors but continue running + print(f"Error in continuous injection loop: {e}") + self.cleanup_all_experiments() + + print("🛑 Continuous transient issue injection stopped") + self._injection_running = False + + # Reset stop event + self._stop_event.clear() + + # Run in background thread + self._injection_thread = threading.Thread(target=injection_loop, daemon=True) + self._injection_running = True + self._injection_thread.start() + + return True + + def stop_continuous_injection(self, cleanup_active: bool = True) -> bool: + """Stop continuous transient issue injection + + Args: + cleanup_active: Whether to cleanup currently active experiments, default True + + Returns: + bool: Whether successfully stopped + """ + if not self._injection_running: + print("⚠️ Continuous injection not running") + return False + + print("🛑 Stopping continuous transient issue injection...") + + # Set stop signal + self._stop_event.set() + + # Wait for injection thread to end + if self._injection_thread and self._injection_thread.is_alive(): + self._injection_thread.join(timeout=10) # Wait at most 10 seconds + + if self._injection_thread.is_alive(): + print("⚠️ Injection thread failed to stop within 10 seconds") + return False + + # Optional: cleanup currently active experiments + if cleanup_active and self.active_experiments: + print(f"🧹 Cleaning up {len(self.active_experiments)} active experiments...") + self.cleanup_all_experiments() + + self._injection_running = False + print("✅ Continuous transient issue injection successfully stopped") + return True + + def restart_continuous_injection(self, fault_types: List[FaultType] = None, + scopes: List[PodScope] = None, + interval_min: int = 60, interval_max: int = 300, + cleanup_active: bool = False) -> bool: + """Restart continuous injection + + Args: + fault_types: List of allowed fault types + scopes: List of allowed scope levels + interval_min: Minimum injection interval in seconds + interval_max: Maximum injection interval in seconds + cleanup_active: Whether to cleanup active experiments before restart + + Returns: + bool: Whether successfully restarted + """ + print("🔄 Restarting continuous transient issue injection...") + + # First stop current injection + if self._injection_running: + if not self.stop_continuous_injection(cleanup_active): + return False + + # Start new injection + return self.start_continuous_injection(fault_types, scopes, interval_min, interval_max) + + + ### Status and statistics + def get_active_experiments(self) -> List[Dict]: + """Get information of currently active experiments""" + result = [] + current_time = time.time() + + for name, experiment in self.active_experiments.items(): + elapsed = current_time - experiment.start_time + remaining = max(0, experiment.duration - elapsed) + + result.append({ + "name": name, + "type": experiment.experiment_type, + "fault_type": experiment.fault_type.value, + "scope": experiment.scope.value, + "target_pods": experiment.target_pods, + "duration": experiment.duration, + "elapsed": round(elapsed, 1), + "remaining": round(remaining, 1), + "description": self.experiment_types[experiment.experiment_type]["description"] + }) + + return result + + def get_statistics(self) -> Dict: + """Get injection statistics""" + stats = { + "total_active": len(self.active_experiments), + "by_fault_type": {}, + "by_scope": {}, + "by_experiment_type": {} + } + + for experiment in self.active_experiments.values(): + # Statistics by fault type + fault_type = experiment.fault_type.value + stats["by_fault_type"][fault_type] = stats["by_fault_type"].get(fault_type, 0) + 1 + + # Statistics by scope + scope = experiment.scope.value + stats["by_scope"][scope] = stats["by_scope"].get(scope, 0) + 1 + + # Statistics by experiment type + exp_type = experiment.experiment_type + stats["by_experiment_type"][exp_type] = stats["by_experiment_type"].get(exp_type, 0) + 1 + + return stats + + + ### Specific injection methods & cleanup implementations + def _inject_pod_kill(self, experiment_name: str, target_pods: List[Dict[str,str]]) -> bool: + """Inject pod kill fault""" + try: + # target_namespace = self.namespace if scope != PodScope.NON_TARGET_NAMESPACE else "default" + + # Get actual labels of target pods + target_pod = random.choice(target_pods) + target_namespace = target_pod['namespace'] + target_label = target_pod['service_label'] + label_selector = self._get_pod_label_selector(target_label, target_namespace) + + if not label_selector: + print(f"Cannot find suitable label selector for pod {target_pod}") + return False + + chaos_experiment = { + "apiVersion": "chaos-mesh.org/v1alpha1", + "kind": "PodChaos", + "metadata": {"name": experiment_name, "namespace": 'chaos-mesh'}, + "spec": { + "action": "pod-kill", + "mode": "one", + "selector": { + "namespaces": [target_namespace], + "labelSelectors": label_selector + } + } + } + self.chaos_injector.create_chaos_experiment(chaos_experiment, experiment_name) + return True + except Exception as e: + raise RuntimeError(f"Failed to inject pod kill: {e}") from e + + def _cleanup_pod_kill(self, experiment_name: str): + """Cleanup pod kill fault""" + try: + self.chaos_injector.delete_chaos_experiment(experiment_name) + except Exception as e: + raise RuntimeError(f"Failed to cleanup pod kill experiment {experiment_name}: {e}") from e + + def _inject_network_delay(self, experiment_name: str, target_pods: List[str]) -> bool: + """Inject network delay fault""" + try: + latencies = ["100ms", "200ms", "500ms", "1s", "2s"] + latency = random.choice(latencies) + # target_namespace = self.namespace if scope != PodScope.NON_TARGET_NAMESPACE else "default" + + + target_pod = random.choice(target_pods) + target_namespace = target_pod['namespace'] + target_label = target_pod['service_label'] + label_selector = self._get_pod_label_selector(target_label, target_namespace) + + if not label_selector: + print(f"Cannot find suitable label selector for pod {target_pod}") + return False + + chaos_experiment = { + "apiVersion": "chaos-mesh.org/v1alpha1", + "kind": "NetworkChaos", + "metadata": {"name": experiment_name, "namespace": 'chaos-mesh'}, + "spec": { + "action": "delay", + "mode": "one", + "selector": { + "namespaces": [target_namespace], + "labelSelectors": label_selector + }, + "delay": {"latency": latency, "correlation": "100", "jitter": "0ms"} + } + } + self.chaos_injector.create_chaos_experiment(chaos_experiment, experiment_name) + return True + except Exception as e: + raise RuntimeError(f"Failed to inject network delay: {e}") from e + + def _cleanup_network_delay(self, experiment_name: str): + """Cleanup network delay fault""" + try: + self.chaos_injector.delete_chaos_experiment(experiment_name) + except Exception as e: + raise RuntimeError(f"Failed to cleanup network delay experiment {experiment_name}: {e}") from e + + def _inject_cpu_stress(self, experiment_name: str, target_pods: List[str]) -> bool: + """Inject CPU stress fault""" + try: + target_pod = random.choice(target_pods) + target_namespace = target_pod['namespace'] + target_label = target_pod['service_label'] + + label_selector = self._get_pod_label_selector(target_label, target_namespace) + + if not label_selector: + print(f"Cannot find suitable label selector for pod {target_pod}") + return False + + chaos_experiment = { + "apiVersion": "chaos-mesh.org/v1alpha1", + "kind": "StressChaos", + "metadata": {"name": experiment_name, "namespace": 'chaos-mesh'}, + "spec": { + "mode": "one", + "selector": { + "namespaces": [target_namespace], + "labelSelectors": label_selector + }, + "stressors": { + "cpu": { + "workers": random.randint(1, 4), + "load": random.randint(50, 100) + } + } + } + } + self.chaos_injector.create_chaos_experiment(chaos_experiment, experiment_name) + return True + except Exception as e: + raise RuntimeError(f"Failed to inject CPU stress: {e}") from e + + def _cleanup_cpu_stress(self, experiment_name: str): + """Cleanup CPU stress fault""" + try: + self.chaos_injector.delete_chaos_experiment(experiment_name) + except Exception as e: + raise RuntimeError(f"Failed to cleanup CPU stress experiment {experiment_name}: {e}") from e + + def _inject_memory_stress(self, experiment_name: str, target_pods: List[str]) -> bool: + """Inject memory stress fault""" + try: + target_pod = random.choice(target_pods) + memory_sizes = ["50%", "70%", "80%"] + memory_size = random.choice(memory_sizes) + # target_namespace = self.namespace if scope != PodScope.NON_TARGET_NAMESPACE else "default" + target_namespace = target_pod['namespace'] + target_label = target_pod['service_label'] + + label_selector = self._get_pod_label_selector(target_label, target_namespace) + + if not label_selector: + print(f"Cannot find suitable label selector for pod {target_pod}") + return False + + chaos_experiment = { + "apiVersion": "chaos-mesh.org/v1alpha1", + "kind": "StressChaos", + "metadata": {"name": experiment_name, "namespace": 'chaos-mesh'}, + "spec": { + "mode": "one", + "selector": { + "namespaces": [target_namespace], + "labelSelectors": label_selector + }, + "stressors": { + "memory": { + "workers": random.randint(1, 4), + "size": memory_size + } + } + } + } + self.chaos_injector.create_chaos_experiment(chaos_experiment, experiment_name) + return True + except Exception as e: + raise RuntimeError(f"Failed to inject memory stress: {e}") from e + + def _cleanup_memory_stress(self, experiment_name: str): + """Cleanup memory stress fault""" + try: + self.chaos_injector.delete_chaos_experiment(experiment_name) + except Exception as e: + raise RuntimeError(f"Failed to cleanup memory stress experiment {experiment_name}: {e}") from e + + def _inject_network_loss(self, experiment_name: str, target_pods: List[str]) -> bool: + """Inject network packet loss fault""" + try: + loss_rates = ["10", "20", "30", "50"] + loss_rate = random.choice(loss_rates) + # target_namespace = self.namespace if scope != PodScope.NON_TARGET_NAMESPACE else "default" + + target_pod = random.choice(target_pods) + target_namespace = target_pod['namespace'] + target_label = target_pod['service_label'] + label_selector = self._get_pod_label_selector(target_label, target_namespace) + + if not label_selector: + print(f"Cannot find suitable label selector for pod {target_pod}") + return False + + chaos_experiment = { + "apiVersion": "chaos-mesh.org/v1alpha1", + "kind": "NetworkChaos", + "metadata": {"name": experiment_name, "namespace": 'chaos-mesh'}, + "spec": { + "action": "loss", + "mode": "one", + "selector": { + "namespaces": [target_namespace], + "labelSelectors": label_selector + }, + "loss": {"loss": loss_rate, "correlation": "100"} + } + } + self.chaos_injector.create_chaos_experiment(chaos_experiment, experiment_name) + return True + except Exception as e: + raise RuntimeError(f"Failed to inject network loss: {e}") from e + + def _cleanup_network_loss(self, experiment_name: str): + """Cleanup network packet loss fault""" + try: + self.chaos_injector.delete_chaos_experiment(experiment_name) + except Exception as e: + raise RuntimeError(f"Failed to cleanup network loss experiment {experiment_name}: {e}") from e + + def _inject_container_kill(self, experiment_name: str, target_pods: List[str]) -> bool: + """Inject container kill fault""" + try: + # target_namespace = self.namespace if scope != PodScope.NON_TARGET_NAMESPACE else "default" + + target_pod = random.choice(target_pods) + target_namespace = target_pod['namespace'] + target_label = target_pod['service_label'] + label_selector = self._get_pod_label_selector(target_label, target_namespace) + + if not label_selector: + print(f"Cannot find suitable label selector for pod {target_pod}") + return False + + # Get container names + container_names = self._get_container_names(target_label, target_namespace) + if not container_names: + print(f"Cannot get container names for service {target_pod}, skipping container kill fault") + return False + + # Randomly select containers to kill + if len(container_names) == 1: + # Only one container, select it directly + selected_containers = container_names + else: + # Multiple containers, randomly decide how many to kill + num_to_kill = random.randint(1, len(container_names)) + selected_containers = random.sample(container_names, num_to_kill) + + print(f"Will kill containers: {selected_containers}") + + chaos_experiment = { + "apiVersion": "chaos-mesh.org/v1alpha1", + "kind": "PodChaos", + "metadata": {"name": experiment_name, "namespace": 'chaos-mesh'}, + "spec": { + "action": "container-kill", + "mode": "one", + "selector": { + "namespaces": [target_namespace], + "labelSelectors": label_selector + }, + "containerNames": selected_containers + } + } + self.chaos_injector.create_chaos_experiment(chaos_experiment, experiment_name) + return True + except Exception as e: + raise RuntimeError(f"Failed to inject container kill: {e}") from e + + def _cleanup_container_kill(self, experiment_name: str): + """Cleanup container kill fault""" + try: + self.chaos_injector.delete_chaos_experiment(experiment_name) + except Exception as e: + raise RuntimeError(f"Failed to cleanup container kill experiment {experiment_name}: {e}") from e + + def _inject_pod_failure(self, experiment_name: str, target_pods: List[str]) -> bool: + """Inject pod failure fault""" + try: + target_pod = random.choice(target_pods) + target_namespace = target_pod['namespace'] + target_label = target_pod['service_label'] + label_selector = self._get_pod_label_selector(target_label, target_namespace) + + if not label_selector: + print(f"Cannot find suitable label selector for pod {target_pod}") + return False + + chaos_experiment = { + "apiVersion": "chaos-mesh.org/v1alpha1", + "kind": "PodChaos", + "metadata": {"name": experiment_name, "namespace": 'chaos-mesh'}, + "spec": { + "action": "pod-failure", + "mode": "one", + "selector": { + "namespaces": [target_namespace], + "labelSelectors": label_selector + } + } + } + self.chaos_injector.create_chaos_experiment(chaos_experiment, experiment_name) + return True + except Exception as e: + raise RuntimeError(f"Failed to inject pod failure: {e}") from e + + def _cleanup_pod_failure(self, experiment_name: str): + """Cleanup pod failure fault""" + try: + self.chaos_injector.delete_chaos_experiment(experiment_name) + except Exception as e: + raise RuntimeError(f"Failed to cleanup pod failure experiment {experiment_name}: {e}") from e + + def _inject_network_partition(self, experiment_name: str, target_pods: List[str]) -> bool: + """Inject network partition fault""" + try: + # For network partition, we need at least 2 different services + if len(target_pods) < 2: + print("Network partition requires target pods, skipping injection") + return False + + from_pod = target_pods[0] + to_pod = target_pods[1] + + from_namespace = from_pod['namespace'] + to_namespace = to_pod['namespace'] + from_service = from_pod['service_label'] + to_service = to_pod['service_label'] + + # Get label selectors for both services + from_label_selector = self._get_pod_label_selector(from_service, from_namespace) + to_label_selector = self._get_pod_label_selector(to_service, to_namespace) + + if not from_label_selector or not to_label_selector: + print(f"Cannot find suitable label selectors for partition: {from_service} -> {to_service}") + return False + + chaos_experiment = { + "apiVersion": "chaos-mesh.org/v1alpha1", + "kind": "NetworkChaos", + "metadata": {"name": experiment_name, "namespace": 'chaos-mesh'}, + "spec": { + "action": "partition", + "mode": "all", + "selector": { + "namespaces": [from_namespace], + "labelSelectors": from_label_selector + }, + "direction": "to", + "target": { + "mode": "all", + "selector": { + "namespaces": [to_namespace], + "labelSelectors": to_label_selector + } + } + } + } + + print(f"Creating network partition: {from_service}@{from_namespace} -> {to_service}@{to_namespace}") + self.chaos_injector.create_chaos_experiment(chaos_experiment, experiment_name) + return True + + except Exception as e: + raise RuntimeError(f"Failed to inject network partition: {e}") from e + + def _cleanup_network_partition(self, experiment_name: str): + """Cleanup network partition fault""" + try: + self.chaos_injector.delete_chaos_experiment(experiment_name) + except Exception as e: + raise RuntimeError(f"Failed to cleanup network partition experiment {experiment_name}: {e}") from e + + + ### help methods + def get_pods_by_scope(self, scope: PodScope, count: int) -> List[Dict[str, str]]: + """Get pod list based on scope level""" + try: + v1 = client.CoreV1Api() + all_pods = [] + + # Step 1: Determine target namespace list based on PodScope + target_namespaces = self._get_target_namespaces(scope) + if not target_namespaces: + print(f"No suitable namespace found in scope {scope.value}") + return [] + # Step 2: Collect qualifying pods in target namespace + for target_namespace in target_namespaces: + pods = v1.list_namespaced_pod(namespace=target_namespace) + + for pod in pods.items: + labels = pod.metadata.labels or {} + service_name = None + + # Extract service name + if "app.kubernetes.io/component" in labels: + service_name = labels["app.kubernetes.io/component"] + elif "app.kubernetes.io/name" in labels: + service_name = labels["app.kubernetes.io/name"] + elif "io.kompose.service" in labels: + service_name = labels["io.kompose.service"] + elif "openebs.io/component-name" in labels: + service_name = labels["openebs.io/component-name"] + elif "service" in labels: + service_name = labels["service"] + elif "app" in labels: + service_name = labels["app"] + + if service_name: + # Decide whether to include this pod based on scope + if self._should_include_pod(service_name, target_namespace, scope): + all_pods.append({ + 'service_label': service_name, + 'namespace': target_namespace + }) + + # Step 3: Deduplication + unique_pods = self._deduplicate_pods(all_pods) + + if not unique_pods: + print(f"No suitable pods found in scope {scope.value}") + return [] + + # Step 4: Random selection + selected = random.sample(unique_pods, min(count, len(unique_pods))) + return selected + + except Exception as e: + raise RuntimeError(f"Error getting pod list for scope {scope.value}: {e}") from e + + def _get_target_namespaces(self, scope: PodScope) -> List[str]: + """Get target namespace list based on PodScope""" + try: + v1 = client.CoreV1Api() + target_namespaces = [] + if scope == PodScope.NON_TARGET_NAMESPACE: + # Get all namespaces (excluding target namespace and system namespaces) + namespaces = v1.list_namespace() + for ns in namespaces.items: + ns_name = ns.metadata.name + if (ns_name != self.namespace and + not ns_name.startswith('kube-') and + not ns_name.startswith('chaos-') and + not ns_name.startswith('khaos') and + ns_name != 'default'): + target_namespaces.append(ns_name) + # return target_namespaces + + elif scope == PodScope.ALL_PODS: + # Get all namespaces in cluster (excluding system namespaces) + namespaces = v1.list_namespace() + for ns in namespaces.items: + ns_name = ns.metadata.name + if (not ns_name.startswith('kube-') and + not ns_name.startswith('chaos-') and + not ns_name.startswith('khaos') and + ns_name != 'default'): + target_namespaces.append(ns_name) + # return target_namespaces + + else: + # TARGET_SERVICE, TARGET_NAMESPACE, NON_TARGET_SERVICE + # These are all limited to target namespace + target_namespaces.append(self.namespace) + + # select one namespace randomly + if target_namespaces: + return target_namespaces + else: + return [] + + except Exception as e: + raise RuntimeError(f"Error getting target namespaces: {e}") from e + + def _should_include_pod(self, service_name: str, namespace: str, scope: PodScope) -> bool: + """Determine whether to include specified pod based on scope""" + if scope == PodScope.TARGET_SERVICE: + # Only include target services, and must be within target namespace + return (namespace == self.namespace and + service_name in self.target_services) + + elif scope == PodScope.NON_TARGET_SERVICE: + # Exclude target services, but must be within target namespace + return (namespace == self.namespace and + service_name not in self.target_services) + + elif scope == PodScope.TARGET_NAMESPACE: + # Include all services within target namespace + return namespace == self.namespace + + elif scope in [PodScope.NON_TARGET_NAMESPACE, PodScope.ALL_PODS]: + # For cross-namespace scenarios, include all valid services + return True + + return False + + def _deduplicate_pods(self, all_pods: List[Dict[str, str]]) -> List[Dict[str, str]]: + """Deduplicate pods list""" + unique_pods = [] + seen = set() + + for pod_info in all_pods: + identifier = f"{pod_info['service_label']}:{pod_info['namespace']}" + if identifier not in seen: + seen.add(identifier) + unique_pods.append(pod_info) + + return unique_pods + + def select_random_experiment_type(self, fault_types: List[FaultType] = None, + scopes: List[PodScope] = None) -> Optional[str]: + """Randomly select experiment type based on weights and filter conditions""" + # Filter qualifying experiment types + available_types = [] + weights = [] + + for exp_type, config in self.experiment_types.items(): + # Check fault type filter + if fault_types and config["fault_type"] not in fault_types: + continue + + # Check scope filter + if scopes and not any(scope in config["scopes"] for scope in scopes): + continue + + available_types.append(exp_type) + weights.append(config["weight"]) + + if not available_types: + print("No qualifying experiment types") + return None + + return random.choices(available_types, weights=weights, k=1)[0] + + def generate_experiment_name(self, experiment_type: str) -> str: + """Generate unique experiment name""" + timestamp = int(time.time()) + random_suffix = random.randint(1000, 9999) + return f"transient-{experiment_type}-{timestamp}-{random_suffix}" + + def _get_pod_label_selector(self, target_service: str, namespace: str) -> Dict[str, str]: + """Get correct label selector based on service name""" + try: + v1 = client.CoreV1Api() + pods = v1.list_namespaced_pod(namespace=namespace) + + for pod in pods.items: + labels = pod.metadata.labels or {} + + # Check different label formats + if "app.kubernetes.io/component" in labels and labels["app.kubernetes.io/component"] == target_service: + return {"app.kubernetes.io/component": target_service} + elif "io.kompose.service" in labels and labels["io.kompose.service"] == target_service: + return {"io.kompose.service": target_service} + elif "openebs.io/component-name" in labels and labels["openebs.io/component-name"] == target_service: + return {"openebs.io/component-name": target_service} + elif "app.kubernetes.io/name" in labels and labels["app.kubernetes.io/name"] == target_service: + return {"app.kubernetes.io/name": target_service} + elif "app" in labels and labels["app"] == target_service: + return {"app": target_service} + elif "service" in labels and labels["service"] == target_service: + return {"service": target_service} + + print(f"Label selector not found for service {target_service}") + return {} + + except Exception as e: + raise RuntimeError(f"Error getting label selector: {e}") from e + + def _get_container_names(self, target_service: str, namespace: str) -> List[str]: + """Get container name list for specified service""" + try: + v1 = client.CoreV1Api() + pods = v1.list_namespaced_pod(namespace=namespace) + + for pod in pods.items: + labels = pod.metadata.labels or {} + + # Check if it's a pod of target service + service_match = False + if "app.kubernetes.io/component" in labels and labels["app.kubernetes.io/component"] == target_service: + service_match = True + elif "io.kompose.service" in labels and labels["io.kompose.service"] == target_service: + service_match = True + elif "app.kubernetes.io/name" in labels and labels["app.kubernetes.io/name"] == target_service: + service_match = True + elif "service" in labels and labels["service"] == target_service: + service_match = True + elif "openebs.io/component-name" in labels and labels["openebs.io/component-name"] == target_service: + service_match = True + elif "app" in labels and labels["app"] == target_service: + service_match = True + + if service_match and pod.spec.containers: + # Return all container names of first matching pod + container_names = [container.name for container in pod.spec.containers] + print(f"Found containers for service {target_service}: {container_names}") + return container_names + + print(f"Containers not found for service {target_service}") + return [] + + except Exception as e: + raise RuntimeError(f"Error getting container names: {e}") from e + +# Usage example +if __name__ == "__main__": + # Create generator + generator = TransientIssuesGenerator( + namespace="hotel-reservation", + target_services=["frontend"], # Specify target services + min_duration=30, # Minimum duration 30 seconds + max_duration=50 # Maximum duration 50 seconds + ) + + # # Example 1: Only inject fail-stop type faults, scope limited to target service + # generator.start_continuous_injection( + # fault_types=[FaultType.FAIL_STOP, FaultType.FAIL_SLOW], + # scopes=[PodScope.ALL_PODS], + # interval_min=5, + # interval_max=30 + # ) + # time.sleep(30) # Run for 30 seconds then stop + # Example 2: Manually inject a fail-slow type fault, scope within target namespace + experiment_name = generator.inject_transient_issue( + fault_types=[FaultType.FAIL_STOP], + scopes=[PodScope.ALL_PODS], + experiment="network-partition" + ) + + # Example 3: View active experiments and statistics + active = generator.get_active_experiments() + stats = generator.get_statistics() + print("Currently active experiments:", active) + print("Statistics:", stats) + + # generator.stop_continuous_injection() + + # Example 4: Cleanup all experiments (when program ends) + generator.cleanup_all_experiments() diff --git a/sregym/generators/workload/__init__.py b/sregym/generators/workload/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sregym/generators/workload/base.py b/sregym/generators/workload/base.py new file mode 100644 index 0000000..5f6fe54 --- /dev/null +++ b/sregym/generators/workload/base.py @@ -0,0 +1,60 @@ +from abc import ABC, abstractmethod + +from pydantic.dataclasses import dataclass + +# Two types of workload generators: +# 1. Constantly running workload generator +# 2. Workload generator that runs for a fixed duration +# By repeating type 2, we can always assume it's constantly running. + +# Two purposes: +# 1. To generate traces +# 2. Validation + + +@dataclass +class WorkloadEntry: + time: float # Start time of the workload run + number: int # Number of requests generated in this workload run + log: str # Log of the workload run + ok: bool # Indicates if the workload was successful + + +class WorkloadManager(ABC): + """ + Constantly running workload generator. + """ + + def __init__(self): + super().__init__() + + @abstractmethod + def start(self, *args, **kwargs): + """ + Start the workload generator. + """ + pass + + @abstractmethod + def stop(self, *args, **kwargs): + """ + Stop the workload generator. + """ + pass + + @abstractmethod + def collect(self, number=100, since_seconds=None) -> list[WorkloadEntry]: + """ + Run the workload generator until collected data is sufficient. + - Number of requests should be at least `number` starting from `since_seconds` ago. + - If `since_seconds` is not provided, it should start from the current time. + - `since_seconds` is a relative time in seconds, not an absolute timestamp. + """ + pass + + @abstractmethod + def recent_entries(self, duration=30) -> list[WorkloadEntry]: + """ + Return recently collected data within the given duration (seconds). + """ + pass diff --git a/sregym/generators/workload/blueprint_hotel_work.py b/sregym/generators/workload/blueprint_hotel_work.py new file mode 100644 index 0000000..d365ca7 --- /dev/null +++ b/sregym/generators/workload/blueprint_hotel_work.py @@ -0,0 +1,303 @@ +import time +import threading +from datetime import datetime + +import yaml +from kubernetes import client, config +from rich.console import Console + +import logging +from sregym.generators.workload.base import WorkloadEntry +from sregym.generators.workload.stream import StreamWorkloadManager +from sregym.paths import TARGET_MICROSERVICES +from sregym.generators.noise.transient_issues.chaos_injector import ChaosInjector + +# Mimicked the Wrk2 class + +local_logger = logging.getLogger("all.infra.workload") +local_logger.propagate = True +local_logger.setLevel(logging.DEBUG) + +class BHotelWrk: + """ + Persistent workload generator + """ + + def __init__(self, tput: int, duration: str, multiplier: int): + self.tput = tput + self.duration = duration + self.multiplier = multiplier + + config.load_kube_config() + + def create_configmap(self, config_name, namespace): + api_instance = client.CoreV1Api() + bhotelwrk_job_configmap = TARGET_MICROSERVICES / "BlueprintHotelReservation" / "wlgen" / "wlgen_proc-configmap.yaml" + with open(bhotelwrk_job_configmap, 'r', encoding='utf-8') as f: + configmap_template = yaml.safe_load(f) + + configmap_template['data']['TPUT'] = str(self.tput) + configmap_template['data']['DURATION'] = self.duration + configmap_template['data']['MULTIPLIER'] = str(self.multiplier) + + try: + local_logger.info(f"Checking for existing ConfigMap '{config_name}'...") + api_instance.delete_namespaced_config_map(name=config_name, namespace=namespace) + local_logger.info(f"ConfigMap '{config_name}' deleted.") + except client.exceptions.ApiException as e: + if e.status != 404: + local_logger.error(f"Error deleting ConfigMap '{config_name}': {e}") + return + + try: + local_logger.info(f"Creating ConfigMap '{config_name}'...") + api_instance.create_namespaced_config_map(namespace=namespace, body=configmap_template) + local_logger.info(f"ConfigMap '{config_name}' created successfully.") + except client.exceptions.ApiException as e: + local_logger.error(f"Error creating ConfigMap '{config_name}': {e}") + + + def create_bhotelwrk_job(self, job_name, namespace): + bhotelwrk_job_yaml = TARGET_MICROSERVICES / "BlueprintHotelReservation" / "wlgen" / "wlgen_proc-job.yaml" + with open(bhotelwrk_job_yaml, "r") as f: + job_template = yaml.safe_load(f) + + api_instance = client.BatchV1Api() + try: + existing_job = api_instance.read_namespaced_job(name=job_name, namespace=namespace) + if existing_job: + local_logger.info(f"Job '{job_name}' already exists. Deleting it...") + api_instance.delete_namespaced_job( + name=job_name, + namespace=namespace, + body=client.V1DeleteOptions(propagation_policy="Foreground"), + ) + self.wait_for_job_deletion(job_name, namespace) + except client.exceptions.ApiException as e: + if e.status != 404: + local_logger.error(f"Error checking for existing job: {e}") + return + + try: + response = api_instance.create_namespaced_job(namespace=namespace, body=job_template) + local_logger.info(f"Job created: {response.metadata.name}") + except client.exceptions.ApiException as e: + local_logger.error(f"Error creating job: {e}") + + def start_workload(self, + namespace, + configmap_name = "bhotelwrk-wlgen-env", + job_name = "bhotelwrk-wlgen-job"): + + self.create_configmap(config_name=configmap_name, namespace=namespace) + + self.create_bhotelwrk_job(job_name=job_name, namespace=namespace) + + def stop_workload(self, namespace, job_name="bhotelwrk-wlgen-proc"): + + api_instance = client.BatchV1Api() + try: + existing_job = api_instance.read_namespaced_job(name=job_name, namespace=namespace) + if existing_job: + local_logger.info(f"Stopping job '{job_name}'...") + api_instance.patch_namespaced_job(name=job_name, namespace=namespace, body={"spec": {"suspend": True}}) + time.sleep(5) + except client.exceptions.ApiException as e: + if e.status != 404: + local_logger.error(f"Error checking for existing job: {e}") + return + + def wait_for_job_deletion(self, job_name, namespace, sleep=2, max_wait=60): + """Wait for a Kubernetes Job to be deleted before proceeding.""" + api_instance = client.BatchV1Api() + console = Console() + waited = 0 + + while waited < max_wait: + try: + api_instance.read_namespaced_job(name=job_name, namespace=namespace) + time.sleep(sleep) + waited += sleep + except client.exceptions.ApiException as e: + if e.status == 404: + console.log(f"[bold green]Job '{job_name}' successfully deleted.") + return + else: + console.log(f"[red]Error checking job deletion: {e}") + raise + + raise TimeoutError(f"[red]Timed out waiting for job '{job_name}' to be deleted.") + + +class BHotelWrkWorkloadManager(StreamWorkloadManager): + """ + Wrk2 workload generator for Kubernetes. + """ + + def __init__(self, wrk: BHotelWrk, namespace:str = 'default', job_name:str="bhotelwrk-wlgen-job", CPU_containment: bool = False): + super().__init__() + self.wrk = wrk + self.job_name = job_name + self.namespace = namespace + self.CPU_containment = CPU_containment + config.load_kube_config() + self.core_v1_api = client.CoreV1Api() + self.batch_v1_api = client.BatchV1Api() + + self.log_pool = [] + + # different from self.last_log_time, which is the timestamp of the whole entry + self.last_log_line_time = None + + def create_task(self): + namespace = self.namespace + configmap_name = "bhotelwrk-wlgen-env" + + self.wrk.create_configmap( + config_name=configmap_name, + namespace=namespace, + ) + + self.wrk.create_bhotelwrk_job( + job_name=self.job_name, + namespace=namespace, + ) + + def _parse_log(self, logs: list[str]) -> WorkloadEntry: + # ----------------------------------------------------------------------- + # 10 requests in 10.00s, 2.62KB read + # Non-2xx or 3xx responses: 10 + + number = -1 + ok = True + + try: + start_time = logs[1].split(": ")[1] + start_time = datetime.strptime(start_time, "%Y-%m-%dT%H:%M:%S.%fZ").timestamp() + number = int(logs[2].split(": ")[1]) + except Exception as e: + local_logger.error(f"Error parsing log: {e}") + number = 0 + start_time = -1 + + return WorkloadEntry( + time=start_time, + number=number, + log="\n".join([log for log in logs[7:]]), + ok=ok, + ) + + def retrievelog(self) -> list[WorkloadEntry]: + namespace = self.namespace + grouped_logs = [] + pods = self.core_v1_api.list_namespaced_pod(namespace, label_selector=f"job-name={self.job_name}") + if len(pods.items) == 0: + raise Exception(f"No pods found for job {self.job_name} in namespace {namespace}") + + try: + logs = self.core_v1_api.read_namespaced_pod_log(pods.items[0].metadata.name, namespace) + logs = logs.split("\n") + except Exception as e: + local_logger.error(f"Error retrieving logs from {self.job_name} : {e}") + return [] + + extracted_logs = self._extract_target_logs(logs, startlog="Finished all requests", endlog="End of latency distribution") + grouped_logs.append(self._parse_log(extracted_logs)) + return grouped_logs + + def _extract_target_logs(self, logs: list[str], startlog: str, endlog: str) -> list[str]: + start_index = None + end_index = None + + for i, log_line in enumerate(logs): + if startlog in log_line: + start_index = i + elif endlog in log_line and start_index is not None: + end_index = i + break + + if start_index is not None and end_index is not None: + return logs[start_index:end_index] + + return [] + + def _schedule_cpu_containment(self): + """ + Schedule CPU containment injection and recovery based on workload start time. + """ + if not self.CPU_containment: + return + + # Initialize fault injector + self.cpu_containment_injector = ChaosInjector(self.namespace) + + # Schedule CPU stress injection after 60 seconds + self.cpu_stress_timer = threading.Timer(60.0, self._inject_cpu_stress) + self.cpu_stress_timer.start() + local_logger.info("CPU stress injection scheduled for 60 seconds after workload start") + + # Schedule CPU stress recovery after 90 seconds + self.cpu_recovery_timer = threading.Timer(90.0, self._recover_cpu_stress) + self.cpu_recovery_timer.start() + local_logger.info("CPU stress recovery scheduled for 90 seconds after workload start") + + def _inject_cpu_stress(self): + """ + Inject CPU stress using the symptom fault injector. + """ + try: + local_logger.info("Injecting CPU stress...") + # You may need to adjust deployment_name and microservice based on your setup + experiment_name = f"cpu-stress-all-pods" + chaos_experiment = { + "apiVersion": "chaos-mesh.org/v1alpha1", + "kind": "StressChaos", + "metadata": { + "name": experiment_name, + "namespace": 'chaos-mesh', + }, + "spec": { + "mode": "all", + "selector": { + "namespaces": [self.namespace], + }, + "stressors": { + "cpu": { + "workers": 30, + "load": 90, + } + }, + }, + } + self.cpu_containment_injector.create_chaos_experiment(chaos_experiment, experiment_name) + start_time = datetime.now().strftime("%Y/%m/%d %H:%M:%S") + local_logger.info(f"[{start_time}] Injecting CPU stress...") + self.current_experiment_name = experiment_name # Save the current experiment name + local_logger.info("CPU stress injection completed") + except Exception as e: + local_logger.error(f"Error injecting CPU stress: {e}") + + def _recover_cpu_stress(self): + """ + Recover from CPU stress by deleting the ChaosMesh experiment. + """ + try: + local_logger.info("Recovering from CPU stress...") + + if hasattr(self, 'current_experiment_name'): + self.cpu_containment_injector.delete_chaos_experiment(self.current_experiment_name) + local_logger.info("CPU stress recovery completed for all pods") + else: + local_logger.error("No active CPU stress experiment found") + + except Exception as e: + local_logger.error(f"Error recovering from CPU stress: {e}") + + def start(self): + local_logger.info("Start Workload with Blueprint Hotel Worklnload Manager") + self.create_task() + self._schedule_cpu_containment() + + def stop(self): + local_logger.info("Stop Workload with Blueprint Hotel Workload Manager") + self.wrk.stop_workload(job_name=self.job_name, namespace=self.namespace) diff --git a/sregym/generators/workload/locust-fetcher-template.yaml b/sregym/generators/workload/locust-fetcher-template.yaml new file mode 100644 index 0000000..fd0ca99 --- /dev/null +++ b/sregym/generators/workload/locust-fetcher-template.yaml @@ -0,0 +1,28 @@ +apiVersion: v1 +kind: Pod +metadata: + name: locust-fetcher + labels: + app: locust-fetcher + job: workload +spec: + containers: + - name: fetcher + image: python:3.12-slim + command: ["bash", "-c"] + args: # Another choice: use requests + - | + apt-get update -qq && apt-get install -y -qq curl && rm -rf /var/lib/apt/lists/* + round=0 + while true; do + echo "Running Locust on round #${round}" + round=$((round + 1)) + curl -s $LOCUST_URL/stats/requests | python -m json.tool --compact + curl -s $LOCUST_URL/stats/reset > /dev/null + sleep $INTERVAL_SECONDS + done + env: + - name: LOCUST_URL + value: "http://load-generator:8089" + - name: INTERVAL_SECONDS + value: "10" \ No newline at end of file diff --git a/sregym/generators/workload/locust.py b/sregym/generators/workload/locust.py new file mode 100644 index 0000000..109f6ce --- /dev/null +++ b/sregym/generators/workload/locust.py @@ -0,0 +1,204 @@ +import json +import math +import time +from datetime import datetime + +import yaml +from kubernetes import client, config, stream + +from sregym.generators.workload.base import WorkloadEntry +from sregym.generators.workload.stream import STREAM_WORKLOAD_EPS, StreamWorkloadManager +from sregym.paths import BASE_DIR +from sregym.service.kubectl import KubeCtl + + +import logging +local_logger = logging.getLogger("all.infra.workload") +local_logger.propagate = True +local_logger.setLevel(logging.DEBUG) + +class LocustWorkloadManager(StreamWorkloadManager): + def __init__(self, namespace: str, locust_url: str): + super().__init__() + + self.namespace = namespace + self.locust_url = locust_url + + self.log_pool = [] + self.last_log_line_time = None + + config.load_kube_config() + self.core_v1_api = client.CoreV1Api() + + self.kubectl = KubeCtl() + + def remove_fetcher(self): + try: + pods = self.core_v1_api.list_namespaced_pod(namespace=self.namespace, label_selector="app=locust-fetcher") + if pods.items: + print("Found locust-fetcher pod, removing it...") + self.core_v1_api.delete_namespaced_pod( + name=pods.items[0].metadata.name, + namespace=self.namespace, + body=client.V1DeleteOptions(grace_period_seconds=0, propagation_policy="Background"), + ) + while True: + time.sleep(5) + pods = self.core_v1_api.list_namespaced_pod( + namespace=self.namespace, label_selector="app=locust-fetcher" + ) + if not pods.items: + break + except client.exceptions.ApiException as e: + if e.status != 404: + print(f"Error removing pod: {e}") + return + + def create_fetcher(self): + self.remove_fetcher() + + wrk_job_yaml = BASE_DIR / "generators" / "workload" / "locust-fetcher-template.yaml" + with open(wrk_job_yaml, "r") as f: + job_template = yaml.safe_load(f) + envs = job_template["spec"]["containers"][0]["env"] + for i, env in enumerate(envs): + if env["name"] == "LOCUST_URL": + envs[i]["value"] = f"http://{self.locust_url}" + break + + try: + response = self.core_v1_api.create_namespaced_pod( + namespace=self.namespace, + body=job_template, + ) + print("Waiting for locust-fetcher pod to be created...") + while True: + pod = self.core_v1_api.read_namespaced_pod_status( + name="locust-fetcher", + namespace=self.namespace, + ) + conditions = pod.status.conditions or [] + ready = any(cond.type == "Ready" and cond.status == "True" for cond in conditions) + if ready: + break + time.sleep(5) + print(f"Pod locust-fetcher created.") + except client.exceptions.ApiException as e: + print(f"Error creating pod: {e}") + return + + def _parse_log(self, log_lines: list[dict]) -> WorkloadEntry: + if "Running Locust on round #" not in log_lines[0]["content"]: + raise ValueError("Log does not contain expected start pattern.") + if len(log_lines) != 2: + raise ValueError("Log does not contain exactly two lines for parsing.") + + parsed_log = json.loads(log_lines[1]["content"]) + + start_time = log_lines[1]["time"][0:26] + "Z" + start_time = datetime.strptime(start_time, "%Y-%m-%dT%H:%M:%S.%fZ").timestamp() + + has_error = False + + for items in parsed_log.get("errors", []): + if items.get("occurrences", 0) > 0: + has_error = True + break + + num_requests = 0 + + for items in parsed_log.get("stats", []): + if items.get("safe_name", "") == "Aggregated": + num_requests = items.get("num_requests", 0) + break + + return WorkloadEntry( + time=start_time, + number=num_requests, + log=log_lines[1]["content"], + ok=not has_error, + ) + + def retrievelog(self, start_time: float | None = None) -> list[WorkloadEntry]: + pods = self.core_v1_api.list_namespaced_pod(self.namespace, label_selector=f"app=locust-fetcher") + + if len(pods.items) == 0: + raise Exception(f"No load-generator found in namespace {self.namespace}") + + kwargs = { + "timestamps": True, + } + if start_time is not None: + resp = stream.stream( + self.core_v1_api.connect_get_namespaced_pod_exec, + name=pods.items[0].metadata.name, + namespace=self.namespace, + command=["date", "-Ins"], + stderr=True, + stdin=False, + stdout=True, + tty=False, + ) + + shorter = resp.strip()[:26] + pod_current_time = datetime.strptime(shorter, "%Y-%m-%dT%H:%M:%S,%f").timestamp() + # Use the difference between pod's current time and requested start_time + kwargs["since_seconds"] = math.ceil(pod_current_time - start_time) + STREAM_WORKLOAD_EPS + + try: + logs = self.core_v1_api.read_namespaced_pod_log(pods.items[0].metadata.name, self.namespace, **kwargs) + logs = logs.split("\n") + except Exception as e: + print(f"Error retrieving logs from {self.job_name} : {e}") + return [] + + for log in logs: + timestamp = log[0:30] + content = log[31:] + + # last_log_line_time: in string format, e.g. "2025-01-01T12:34:56.789012345Z" + if self.last_log_line_time is not None and timestamp <= self.last_log_line_time: + continue + + self.last_log_line_time = timestamp + self.log_pool.append(dict(time=timestamp, content=content)) + + grouped_logs = [] + + last_end = 0 + for i, log in enumerate(self.log_pool): + if "Running Locust on round #" in log["content"]: + try: + grouped_logs.append(self._parse_log(self.log_pool[last_end:i])) + except Exception as e: + # Skip initialization logs and json parsing errors + pass + last_end = i + + self.log_pool = self.log_pool[last_end:] + + return grouped_logs + + def start(self): + local_logger.info("Start Workload with Locust") + local_logger.info("AstronomyShop has a built-in load generator.") + local_logger.info("Creating locust-fetcher pod...") + self.create_fetcher() + local_logger.debug("Workload started") + + def stop(self): + local_logger.info("Stop Workload with Locust") + local_logger.info("AstronomyShop's built-in load generator is automatically managed.") + local_logger.info("Removing locust-fetcher pod if it exists...") + self.remove_fetcher() + local_logger.debug("Workload stopped") + + + def change_users(self, number: int, namespace: str): + increase_user_cmd = f"kubectl set env deployment/load-generator LOCUST_USERS={number} -n {namespace}" + self.kubectl.exec_command(increase_user_cmd) + + def change_spawn_rate(self, rate: int, namespace: str): + increase_spawn_rate_cmd = f"kubectl set env deployment/load-generator LOCUST_SPAWN_RATE={rate} -n {namespace}" + self.kubectl.exec_command(increase_spawn_rate_cmd) + \ No newline at end of file diff --git a/sregym/generators/workload/stream.py b/sregym/generators/workload/stream.py new file mode 100644 index 0000000..4c7ea57 --- /dev/null +++ b/sregym/generators/workload/stream.py @@ -0,0 +1,105 @@ +import time +from abc import abstractmethod +from bisect import bisect_left + +from sregym.generators.workload.base import WorkloadEntry, WorkloadManager + +STREAM_WORKLOAD_TIMEOUT = 60 * 1.5 # 1.5 minutes +STREAM_WORKLOAD_EPS = 10 # 5 seconds + + +class StreamWorkloadManager(WorkloadManager): + """ + Stream-like workload manager + """ + + log_history: list[WorkloadEntry] = [] + last_log_time: float | None = None # The timestamp inside the pod + + def __init__(self): + super().__init__() + + self.last_log_time = None + + @abstractmethod + def retrievelog(self, start_time: float | None = None) -> list[WorkloadEntry]: + """ + Retrieve new logs. Like a stream, it should return only new logs since the last retrieval. + """ + + raise NotImplementedError("Subclasses must implement this method.") + + def _extractlog(self): + """ + Stream-like log extraction. + """ + while True: + # In case of byte limits + new_logs = self.retrievelog(self.last_log_time) + + if not new_logs: + return + + if not sorted(new_logs, key=lambda x: x.time): + raise ValueError("Logs are not sorted by time.") + + first_greater = 0 + if self.last_log_time is not None: + while first_greater < len(new_logs) and new_logs[first_greater].time <= self.last_log_time: + first_greater += 1 + + if first_greater < len(new_logs): + self.log_history.extend(new_logs[first_greater:]) + self.last_log_time = new_logs[-1].time + + def collect(self, number=100, since_seconds=None) -> list[WorkloadEntry]: + """ + Run the workload generator until collected data is sufficient. + """ + if since_seconds is not None: + if not isinstance(since_seconds, (int, float)): + raise TypeError("since_seconds must be a int or float") + if since_seconds > self.last_log_time: + since_seconds = self.last_log_time + + # I put it here becuase the first run of it may be very late + self._extractlog() + + collect_start_time = time.time() + + if since_seconds is None or self.last_log_time is None: + start_entry = len(self.log_history) + else: + start_entry = bisect_left( + self.log_history, + self.last_log_time - since_seconds, + key=lambda x: x.time if isinstance(x, WorkloadEntry) else x, + ) + + end_entry = start_entry + + accumulated_logs = 0 + + while time.time() - collect_start_time < STREAM_WORKLOAD_TIMEOUT: + while end_entry < len(self.log_history): + accumulated_logs += self.log_history[end_entry].number + end_entry += 1 + if accumulated_logs >= number: + return self.log_history[start_entry:end_entry] + time.sleep(5) + self._extractlog() + + raise TimeoutError("Workload generator did not collect enough data within the timeout period.") + + def recent_entries(self, duration=30) -> list[WorkloadEntry]: + """ + Return recently collected data within the given duration (seconds). + """ + self._extractlog() + start_time = self.last_log_time - duration + start_entry = bisect_left( + self.log_history, + start_time, + key=lambda x: x.time if isinstance(x, WorkloadEntry) else x, + ) + return self.log_history[start_entry:] diff --git a/sregym/generators/workload/trainticket_locust.py b/sregym/generators/workload/trainticket_locust.py new file mode 100644 index 0000000..a92f4b5 --- /dev/null +++ b/sregym/generators/workload/trainticket_locust.py @@ -0,0 +1,152 @@ +"""TrainTicket Locust Workload Manager + +Extends the base LocustWorkloadManager to provide TrainTicket-specific +workload generation capabilities. +""" + +import logging +from typing import Optional, Dict, Any + +from sregym.generators.workload.locust import LocustWorkloadManager +from sregym.service.kubectl import KubeCtl + +logger = logging.getLogger(__name__) + + +class TrainTicketLocustWorkloadManager(LocustWorkloadManager): + """TrainTicket-specific Locust workload manager. + + Manages Locust load generation for TrainTicket application, + including specific scenarios for fault injection testing. + """ + + def __init__(self, namespace: str = "train-ticket", kubectl: Optional[KubeCtl] = None): + """Initialize TrainTicket Locust workload manager. + + Args: + namespace: Kubernetes namespace + kubectl: Optional kubectl instance + """ + super().__init__(namespace=namespace, kubectl=kubectl) + self.locust_master_host = "locust-master" + self.locust_web_port = 8089 + + def start(self): + """Start TrainTicket workload generation.""" + try: + # First check if Locust is deployed + if not self._is_locust_ready(): + logger.error("Locust deployment not ready") + return + + # Start the fetcher pod + super().start() + + print("[TrainTicket Locust] Workload manager started") + print(f"[TrainTicket Locust] Access UI at http://:30089 (admin/admin)") + + except Exception as e: + logger.error(f"Error starting TrainTicket workload: {e}") + + def trigger_f1_scenario(self, user_count: int = 10, spawn_rate: int = 2): + """Trigger F1 fault scenario with order creation and cancellation. + + Args: + user_count: Number of simulated users + spawn_rate: Users spawned per second + """ + try: + print(f"[TrainTicket Locust] Triggering F1 scenario with {user_count} users") + + # Start the swarm with specific parameters + result = self.kubectl.exec_command( + f"kubectl exec deployment/locust-master -n {self.namespace} -- " + f"curl -X POST http://localhost:{self.locust_web_port}/swarm " + f"-d 'user_count={user_count}&spawn_rate={spawn_rate}'" + ) + + if result: + print("[TrainTicket Locust] F1 scenario started - users will create and cancel orders") + print("[TrainTicket Locust] This will trigger the 8-second delay fault if enabled") + else: + print("[TrainTicket Locust] Failed to start F1 scenario") + + except Exception as e: + logger.error(f"Error triggering F1 scenario: {e}") + + def stop_workload(self): + """Stop the current workload.""" + try: + result = self.kubectl.exec_command( + f"kubectl exec deployment/locust-master -n {self.namespace} -- " + f"curl -X GET http://localhost:{self.locust_web_port}/stop" + ) + + if result: + print("[TrainTicket Locust] Workload stopped") + else: + print("[TrainTicket Locust] Failed to stop workload") + + except Exception as e: + logger.error(f"Error stopping workload: {e}") + + def get_stats(self) -> Dict[str, Any]: + """Get current Locust statistics. + + Returns: + Dict containing current workload statistics + """ + try: + result = self.kubectl.exec_command( + f"kubectl exec deployment/locust-master -n {self.namespace} -- " + f"curl -s http://localhost:{self.locust_web_port}/stats/requests" + ) + + if result: + import json + return json.loads(result) + else: + return {} + + except Exception as e: + logger.error(f"Error getting stats: {e}") + return {} + + def _is_locust_ready(self) -> bool: + """Check if Locust deployment is ready. + + Returns: + bool: True if Locust is deployed and ready + """ + try: + # Check if Locust master is running + result = self.kubectl.exec_command( + f"kubectl get deployment locust-master -n {self.namespace} " + f"-o jsonpath='{{.status.readyReplicas}}'" + ) + + return result == "1" + + except Exception as e: + logger.error(f"Error checking Locust readiness: {e}") + return False + + def set_target_host(self, host: str): + """Update the target host for load generation. + + Args: + host: New target host URL + """ + try: + # Update the Locust target + result = self.kubectl.exec_command( + f"kubectl exec deployment/locust-master -n {self.namespace} -- " + f"curl -X POST http://localhost:{self.locust_web_port}/swarm " + f"-d 'host={host}'" + ) + + if result: + print(f"[TrainTicket Locust] Target host updated to: {host}") + + except Exception as e: + logger.error(f"Error updating target host: {e}") diff --git a/sregym/generators/workload/wrk-job-template.yaml b/sregym/generators/workload/wrk-job-template.yaml new file mode 100644 index 0000000..3beee3d --- /dev/null +++ b/sregym/generators/workload/wrk-job-template.yaml @@ -0,0 +1,22 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: wrk2-job + labels: + job: workload +spec: + template: + spec: + restartPolicy: Never + containers: + - name: wrk2 + image: deathstarbench/wrk2-client:latest + args: [] + volumeMounts: + - name: wrk2-scripts + mountPath: /scripts + readOnly: true + volumes: + - name: wrk2-scripts + configMap: + name: wrk2-payload-script \ No newline at end of file diff --git a/sregym/generators/workload/wrk2.py b/sregym/generators/workload/wrk2.py new file mode 100644 index 0000000..49fb9ef --- /dev/null +++ b/sregym/generators/workload/wrk2.py @@ -0,0 +1,323 @@ +import math +import textwrap +import time +from datetime import datetime +from pathlib import Path + +import yaml +from kubernetes import client, config, stream +from rich.console import Console + +from sregym.generators.workload.base import WorkloadEntry +from sregym.generators.workload.stream import STREAM_WORKLOAD_EPS, StreamWorkloadManager +from sregym.paths import BASE_DIR + + +import logging +local_logger = logging.getLogger("all.infra.workload") +local_logger.propagate = True +local_logger.setLevel(logging.DEBUG) + +class Wrk2: + """ + Persistent workload generator + """ + + def __init__(self, rate, dist="norm", connections=2, duration=6, threads=2, latency=True, namespace="default"): + self.rate = rate + self.dist = dist + self.connections = connections + self.duration = duration + self.threads = threads + self.latency = latency + self.namespace = namespace + + config.load_kube_config() + + def create_configmap(self, name, namespace, payload_script_path, url): + with open(payload_script_path, "r") as script_file: + script_content = script_file.read() + + workload_script = f""" + #!/bin/bash + round=0 + while true; do + echo "Running wrk2 on round #${{round}}" + round=$((round + 1)) + + wrk -D {self.dist} \\ + -t {str(self.threads)} \\ + -c {str(self.connections)} \\ + -d {self.duration}s \\ + -s /scripts/{payload_script_path.name} \\ + {url} \\ + -R {str(self.rate)} \\ + -L {"--latency" if self.latency else ""} + sleep 1 + done + """ + + workload_script = textwrap.dedent(workload_script).strip() + + configmap_body = client.V1ConfigMap( + metadata=client.V1ObjectMeta(name=name), + data={ + payload_script_path.name: script_content, + "wrk2-workload.sh": workload_script, + }, + ) + + api_instance = client.CoreV1Api() + try: + local_logger.info(f"Checking for existing ConfigMap '{name}'...") + api_instance.delete_namespaced_config_map(name=name, namespace=self.namespace) + local_logger.info(f"ConfigMap '{name}' deleted.") + except client.exceptions.ApiException as e: + if e.status != 404: + local_logger.error(f"Error deleting ConfigMap '{name}': {e}") + return + + try: + local_logger.info(f"Creating ConfigMap '{name}'...") + api_instance.create_namespaced_config_map(namespace=self.namespace, body=configmap_body) + local_logger.info(f"ConfigMap '{name}' created successfully.") + except client.exceptions.ApiException as e: + local_logger.error(f"Error creating ConfigMap '{name}': {e}") + + def create_wrk_job(self, job_name, namespace, payload_script): + wrk_job_yaml = BASE_DIR / "generators" / "workload" / "wrk-job-template.yaml" + with open(wrk_job_yaml, "r") as f: + job_template = yaml.safe_load(f) + + job_template["metadata"]["name"] = job_name + container = job_template["spec"]["template"]["spec"]["containers"][0] + container["args"] = ["/bin/bash", "/scripts/wrk2-workload.sh"] + + job_template["spec"]["template"]["spec"]["volumes"] = [ + { + "name": "wrk2-scripts", + "configMap": {"name": "wrk2-payload-script"}, + } + ] + container["volumeMounts"] = [ + { + "name": "wrk2-scripts", + "mountPath": f"/scripts/{payload_script}", + "subPath": payload_script, + }, + { + "name": "wrk2-scripts", + "mountPath": f"/scripts/wrk2-workload.sh", + "subPath": "wrk2-workload.sh", + }, + ] + + api_instance = client.BatchV1Api() + try: + existing_job = api_instance.read_namespaced_job(name=job_name, namespace=self.namespace) + if existing_job: + local_logger.info(f"Job '{job_name}' already exists. Deleting it...") + api_instance.delete_namespaced_job( + name=job_name, + namespace=self.namespace, + body=client.V1DeleteOptions(propagation_policy="Foreground"), + ) + self.wait_for_job_deletion(job_name, self.namespace) + except client.exceptions.ApiException as e: + if e.status != 404: + local_logger.error(f"Error checking for existing job: {e}") + return + + try: + response = api_instance.create_namespaced_job(namespace=self.namespace, body=job_template) + local_logger.info(f"Job created: {response.metadata.name}") + except client.exceptions.ApiException as e: + local_logger.error(f"Error creating job: {e}") + + def start_workload(self, payload_script, url): + configmap_name = "wrk2-payload-script" + + self.create_configmap(name=configmap_name, namespace=self.namespace, payload_script_path=payload_script, url=url) + + self.create_wrk_job(job_name="wrk2-job", namespace=self.namespace, payload_script=payload_script.name) + + def stop_workload(self, job_name="wrk2-job"): + api_instance = client.BatchV1Api() + try: + existing_job = api_instance.read_namespaced_job(name=job_name, namespace=self.namespace) + if existing_job: + local_logger.info(f"Stopping job '{job_name}'...") + # @daklqw: I think there might be a better way + api_instance.patch_namespaced_job(name=job_name, namespace=self.namespace, body={"spec": {"suspend": True}}) + time.sleep(5) + except client.exceptions.ApiException as e: + if e.status != 404: + local_logger.error(f"Error checking for existing job: {e}") + return + + def wait_for_job_deletion(self, job_name, namespace, sleep=2, max_wait=60): + """Wait for a Kubernetes Job to be deleted before proceeding.""" + api_instance = client.BatchV1Api() + console = Console() + waited = 0 + + while waited < max_wait: + try: + api_instance.read_namespaced_job(name=job_name, namespace=self.namespace) + time.sleep(sleep) + waited += sleep + except client.exceptions.ApiException as e: + if e.status == 404: + console.log(f"[bold green]Job '{job_name}' successfully deleted.") + return + else: + console.log(f"[red]Error checking job deletion: {e}") + raise + + raise TimeoutError(f"[red]Timed out waiting for job '{job_name}' to be deleted.") + + +class Wrk2WorkloadManager(StreamWorkloadManager): + """ + Wrk2 workload generator for Kubernetes. + """ + + def __init__(self, wrk: Wrk2, payload_script: Path, url, job_name="wrk2-job", namespace="default"): + super().__init__() + self.wrk = wrk + self.payload_script = payload_script + self.url = url + self.job_name = job_name + self.namespace = namespace + + config.load_kube_config() + self.core_v1_api = client.CoreV1Api() + self.batch_v1_api = client.BatchV1Api() + + self.log_pool = [] + + # different from self.last_log_time, which is the timestamp of the whole entry + self.last_log_line_time = None + + def create_task(self): + configmap_name = "wrk2-payload-script" + + self.wrk.create_configmap( + name=configmap_name, + namespace=self.namespace, + payload_script_path=self.payload_script, + url=self.url, + ) + + self.wrk.create_wrk_job( + job_name=self.job_name, + namespace=self.namespace, + payload_script=self.payload_script.name, + ) + + def _parse_log(self, logs: list[tuple[str, str]]) -> WorkloadEntry: + # ----------------------------------------------------------------------- + # 10 requests in 10.00s, 2.62KB read + # Non-2xx or 3xx responses: 10 + + number = -1 + ok = True + + try: + start_time = logs[0]["time"][0:26] + "Z" # Convert to ISO 8601 format + start_time = datetime.strptime(start_time, "%Y-%m-%dT%H:%M:%S.%fZ").timestamp() + + for i, part in enumerate(logs): + log = part["content"] + if "-" * 35 in log and "requests in" in logs[i + 1]["content"]: + parts = logs[i + 1]["content"].split(" ") + for j, part in enumerate(parts): + if part != "": + number = parts[j] + assert j + 1 < len(parts) and parts[j + 1] == "requests" + break + if "Non-2xx or 3xx responses" in log: + ok = False + + number = int(number) + except Exception as e: + local_logger.error(f"Error parsing log: {e}") + number = 0 + start_time = -1 + + return WorkloadEntry( + time=start_time, + number=number, + log="\n".join([part["content"] for part in logs]), + ok=ok, + ) + + def retrievelog(self, start_time: float | None = None) -> list[WorkloadEntry]: + pods = self.core_v1_api.list_namespaced_pod(self.namespace, label_selector=f"job-name={self.job_name}") + if len(pods.items) == 0: + raise Exception(f"No pods found for job {self.job_name} in namespace {self.namespace}") + + kwargs = { + "timestamps": True, + } + if start_time is not None: + # Get the current time inside the pod by executing 'date +%s' in the pod + resp = stream.stream( + self.core_v1_api.connect_get_namespaced_pod_exec, + name=pods.items[0].metadata.name, + namespace=self.namespace, + command=["date", "-Ins"], + stderr=True, + stdin=False, + stdout=True, + tty=False, + ) + + # 2025-01-01T12:34:56,123456 + shorter = resp.strip()[:26] + pod_current_time = datetime.strptime(shorter, "%Y-%m-%dT%H:%M:%S,%f").timestamp() + # Use the difference between pod's current time and requested start_time + kwargs["since_seconds"] = math.ceil(pod_current_time - start_time) + STREAM_WORKLOAD_EPS + + try: + logs = self.core_v1_api.read_namespaced_pod_log(pods.items[0].metadata.name, self.namespace, **kwargs) + logs = logs.split("\n") + except Exception as e: + local_logger.error(f"Error retrieving logs from {self.job_name} : {e}") + return [] + + for log in logs: + timestamp = log[0:30] + content = log[31:] + + # last_log_line_time: in string format, e.g. "2025-01-01T12:34:56.789012345Z" + if self.last_log_line_time is not None and timestamp <= self.last_log_line_time: + continue + + self.last_log_line_time = timestamp + self.log_pool.append(dict(time=timestamp, content=content)) + + # End pattern is: + # - Requests/sec: + # - Transfer/sec: + + grouped_logs = [] + + last_end = 0 + for i, log in enumerate(self.log_pool): + if (i > 0 and "Requests/sec:" in self.log_pool[i - 1]["content"]) and "Transfer/sec:" in log["content"]: + result = self._parse_log(self.log_pool[last_end : i + 1]) + grouped_logs.append(result) + last_end = i + 1 + + self.log_pool = self.log_pool[last_end:] + + return grouped_logs + + def start(self): + local_logger.info("Start Workload with Wrk2") + self.create_task() + + def stop(self): + local_logger.info("Stop Workload of Wrk2") + self.wrk.stop_workload(job_name=self.job_name) diff --git a/sregym/observer/filebeat/.helmignore b/sregym/observer/filebeat/.helmignore new file mode 100644 index 0000000..516d3f8 --- /dev/null +++ b/sregym/observer/filebeat/.helmignore @@ -0,0 +1,2 @@ +tests/ +.pytest_cache/ diff --git a/sregym/observer/filebeat/Chart.yaml b/sregym/observer/filebeat/Chart.yaml new file mode 100644 index 0000000..f43e326 --- /dev/null +++ b/sregym/observer/filebeat/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +description: Official Elastic helm chart for Filebeat +home: https://github.com/elastic/helm-charts +maintainers: + - email: helm-charts@elastic.co + name: Elastic +name: filebeat +version: 8.7.1 +appVersion: 8.7.1 +sources: + - https://github.com/elastic/beats +icon: https://helm.elastic.co/icons/beats.png diff --git a/sregym/observer/filebeat/Makefile b/sregym/observer/filebeat/Makefile new file mode 100644 index 0000000..143a1d9 --- /dev/null +++ b/sregym/observer/filebeat/Makefile @@ -0,0 +1 @@ +include ../helpers/common.mk diff --git a/sregym/observer/filebeat/README.md b/sregym/observer/filebeat/README.md new file mode 100644 index 0000000..3cdfbec --- /dev/null +++ b/sregym/observer/filebeat/README.md @@ -0,0 +1,278 @@ + +```shell +helm install filebeat ./ -n observe +``` + +Change the namespace to capture the log in the `values.yaml`. + +## Filebeat Helm Chart + + +[![Artifact HUB](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/elastic)](https://artifacthub.io/packages/search?repo=elastic) + +This Helm chart is a lightweight way to configure and run our official +[Filebeat Docker image][]. + +> **Warning** +> When it comes to running the Elastic on Kubernetes infrastructure, we +> recommend [Elastic Cloud on Kubernetes][] (ECK) as the best way to run and manage +> the Elastic Stack. +> +> ECK offers many operational benefits for both our basic-tier and our +> enterprise-tier customers, such as spinning up cluster nodes that were lost on +> failed infrastructure, seamless upgrades, rolling cluster changes, and much +> much more. +> +> With the release of the Elastic Stack Helm charts for Elastic version 8.5.1, +> we are handing over the ongoing maintenance of our Elastic Stack Helm charts +> to the community and contributors. This repository will finally be archived +> after 6 months time. Elastic Stacks deployed on Kubernetes through Helm charts +> will still be fully supported under EOL limitations. +> +> Since we want to provide an even better experience for our customers by +> running the Elastic Stack on Kubernetes, we will continue maintaining the +> Helm charts applicable to ECK Custom Resources. These charts can be found in +> the [ECK repository][eck-charts]. +> +> Helm charts will currently be maintained for ECK Enterprise-tier customers, +> however, we encourage the community to engage with the existing Helm charts +> for the Elastic Stack and continue supporting their ongoing maintenance. +> +> See for more details. + + + + + +- [Requirements](#requirements) +- [Installing](#installing) + - [Install a released version using the Helm repository](#install-a-released-version-using-the-helm-repository) + - [Install a development version using the main branch](#install-a-development-version-using-the-main-branch) +- [Upgrading](#upgrading) +- [Usage notes](#usage-notes) +- [Configuration](#configuration) +- [FAQ](#faq) + - [How to use Filebeat with Elasticsearch with security (authentication and TLS) enabled?](#how-to-use-filebeat-with-elasticsearch-with-security-authentication-and-tls-enabled) + - [How to install OSS version of Filebeat?](#how-to-install-oss-version-of-filebeat) + - [Why is Filebeat host.name field set to Kubernetes pod name?](#why-is-filebeat-hostname-field-set-to-kubernetes-pod-name) + - [How do I get multiple beats agents working with hostNetworking enabled?](#how-do-i-get-multiple-beats-agents-working-with-hostnetworking-enabled) + - [How to change readinessProbe for outputs which don't support testing](#how-to-change-readinessprobe-for-outputs-which-dont-support-testing) +- [Contributing](#contributing) + + + + + + +## Requirements + +See [supported configurations][] for more details. + + +## Installing + +### Install a released version using the Helm repository + +* Add the Elastic Helm charts repo: +`helm repo add elastic https://helm.elastic.co` + +* Install it: `helm install filebeat elastic/filebeat` + + +### Install a development version using the main branch + +* Clone the git repo: `git clone git@github.com:elastic/helm-charts.git` + +* Install it: `helm install filebeat ./helm-charts/filebeat --set imageTag=8.5.1` + + +## Upgrading + +Please always check [CHANGELOG.md][] and [BREAKING_CHANGES.md][] before +upgrading to a new chart version. + + +## Usage notes + +* The default Filebeat configuration file for this chart is configured to use an +Elasticsearch endpoint. Without any additional changes, Filebeat will send +documents to the service URL that the Elasticsearch Helm chart sets up by +default. The Elasticsearch credentials are also retrieved from +`elasticsearch-master-credentials` Secret from Elasticsearch chart by default. +You may either set the `ELASTICSEARCH_HOSTS`, `ELASTICSEARCH_USER` and +`ELASTICSEARCH_PASSWORD` environment variables in `extraEnvs` to override this +or modify the default `filebeatConfig` to change this behavior. +* The default Filebeat configuration file is also configured to capture +container logs and enrich them with Kubernetes metadata by default. This will +capture all container logs in the cluster. +* This chart disables the [HostNetwork][] setting by default for compatibility +reasons with the majority of kubernetes providers and scenarios. Some kubernetes +providers may not allow enabling `hostNetwork` and deploying multiple Filebeat +pods on the same node isn't possible with `hostNetwork` However Filebeat does +recommend activating it. If your kubernetes provider is compatible with +`hostNetwork` and you don't need to run multiple Filebeat DaemonSets, you can +activate it by setting `hostNetworking: true` in [values.yaml][]. +* This repo includes several [examples][] of configurations that can be used +as a reference. They are also used in the automated testing of this chart. + + +## Configuration + +| Parameter | Description | Default | +|--------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------| +| `clusterRoleRules` | Configurable [cluster role rules][] that Filebeat uses to access Kubernetes resources | see [values.yaml][] | +| `daemonset.annotations` | Configurable [annotations][] for filebeat daemonset | `{}` | +| `daemonset.labels` | Configurable [labels][] applied to all filebeat DaemonSet pods | `{}` | +| `daemonset.affinity` | Configurable [affinity][] for filebeat daemonset | `{}` | +| `daemonset.enabled` | If true, enable daemonset | `true` | +| `daemonset.envFrom` | Templatable string of `envFrom` to be passed to the [environment from variables][] which will be appended to filebeat container for DaemonSet | `[]` | +| `daemonset.extraEnvs` | Extra [environment variables][] which will be appended to filebeat container for DaemonSet | see [values.yaml][] | +| `daemonset.extraVolumeMounts` | Templatable string of additional `volumeMounts` to be passed to the `tpl` function for DaemonSet | `[]` | +| `daemonset.extraVolumes` | Templatable string of additional `volumes` to be passed to the `tpl` function for DaemonSet | `[]` | +| `daemonset.hostAliases` | Configurable [hostAliases][] for filebeat DaemonSet | `[]` | +| `daemonset.hostNetworking` | Enable filebeat DaemonSet to use `hostNetwork` | `false` | +| `daemonset.filebeatConfig` | Allows you to add any config files in `/usr/share/filebeat` such as `filebeat.yml` for filebeat DaemonSet | see [values.yaml][] | +| `daemonset.maxUnavailable` | The [maxUnavailable][] value for the pod disruption budget. By default this will prevent Kubernetes from having more than 1 unhealthy pod in the node group | `1` | +| `daemonset.nodeSelector` | Configurable [nodeSelector][] for filebeat DaemonSet | `{}` | +| `daemonset.secretMounts` | Allows you easily mount a secret as a file inside the DaemonSet. Useful for mounting certificates and other secrets. See [values.yaml][] for an example | `[]` | +| `daemonset.podSecurityContext` | Configurable [podSecurityContext][] for filebeat DaemonSet pod execution environment | see [values.yaml][] | +| `daemonset.resources` | Allows you to set the [resources][] for filebeat DaemonSet | see [values.yaml][] | +| `daemonset.tolerations` | Configurable [tolerations][] for filebeat DaemonSet | `[]` | +| `deployment.annotations` | Configurable [annotations][] for filebeat Deployment | `{}` | +| `deployment.labels` | Configurable [labels][] applied to all filebeat Deployment pods | `{}` | +| `deployment.affinity` | Configurable [affinity][] for filebeat Deployment | `{}` | +| `deployment.enabled` | If true, enable deployment | `false` | +| `deployment.envFrom` | Templatable string of `envFrom` to be passed to the [environment from variables][] which will be appended to filebeat container for Deployment | `[]` | +| `deployment.extraEnvs` | Extra [environment variables][] which will be appended to filebeat container for Deployment | see [values.yaml][] | +| `deployment.extraVolumeMounts` | Templatable string of additional `volumeMounts` to be passed to the `tpl` function for DaemonSet | `[]` | +| `deployment.extraVolumes` | Templatable string of additional `volumes` to be passed to the `tpl` function for Deployment | `[]` | +| `daemonset.hostAliases` | Configurable [hostAliases][] for filebeat Deployment | `[]` | +| `deployment.filebeatConfig` | Allows you to add any config files in `/usr/share/filebeat` such as `filebeat.yml` for filebeat Deployment | see [values.yaml][] | +| `deployment.nodeSelector` | Configurable [nodeSelector][] for filebeat Deployment | `{}` | +| `deployment.secretMounts` | Allows you easily mount a secret as a file inside the Deployment Useful for mounting certificates and other secrets. See [values.yaml][] for an example | `[]` | +| `deployment.resources` | Allows you to set the [resources][] for filebeat Deployment | see [values.yaml][] | +| `deployment.securityContext` | Configurable [securityContext][] for filebeat Deployment pod execution environment | see [values.yaml][] | +| `deployment.tolerations` | Configurable [tolerations][] for filebeat Deployment | `[]` | +| `replicas` | The replica count for the Filebeat deployment | `1` | +| `extraContainers` | Templatable string of additional containers to be passed to the `tpl` function | `""` | +| `extraInitContainers` | Templatable string of additional containers to be passed to the `tpl` function | `""` | +| `fullnameOverride` | Overrides the full name of the resources. If not set the name will default to " `.Release.Name` - `.Values.nameOverride or .Chart.Name` " | `""` | +| `hostPathRoot` | Fully-qualified [hostPath][] that will be used to persist filebeat registry data | `/var/lib` | +| `imagePullPolicy` | The Kubernetes [imagePullPolicy][] value | `IfNotPresent` | +| `imagePullSecrets` | Configuration for [imagePullSecrets][] so that you can use a private registry for your image | `[]` | +| `imageTag` | The filebeat Docker image tag | `8.5.1` | +| `image` | The filebeat Docker image | `docker.elastic.co/beats/filebeat` | +| `livenessProbe` | Parameters to pass to liveness [probe][] checks for values such as timeouts and thresholds | see [values.yaml][] | +| `managedServiceAccount` | Whether the `serviceAccount` should be managed by this helm chart. Set this to `false` in order to manage your own service account and related roles | `true` | +| `nameOverride` | Overrides the chart name for resources. If not set the name will default to `.Chart.Name` | `""` | +| `podAnnotations` | Configurable [annotations][] applied to all filebeat pods | `{}` | +| `priorityClassName` | The name of the [PriorityClass][]. No default is supplied as the PriorityClass must be created first | `""` | +| `readinessProbe` | Parameters to pass to readiness [probe][] checks for values such as timeouts and thresholds | see [values.yaml][] | +| `serviceAccount` | Custom [serviceAccount][] that filebeat will use during execution. By default will use the service account created by this chart | `""` | +| `serviceAccountAnnotations` | Annotations to be added to the ServiceAccount that is created by this chart. | `{}` | +| `terminationGracePeriod` | Termination period (in seconds) to wait before killing filebeat pod process on pod shutdown | `30` | +| `updateStrategy` | The [updateStrategy][] for the DaemonSet By default Kubernetes will kill and recreate pods on updates. Setting this to `OnDelete` will require that pods be deleted manually | `RollingUpdate` | + + +## FAQ + +### How to use Filebeat with Elasticsearch with security (authentication and TLS) enabled? + +This Helm chart can use existing [Kubernetes secrets][] to setup +credentials or certificates for examples. These secrets should be created +outside of this chart and accessed using [environment variables][] and volumes. + +An example can be found in [examples/security][]. + +### How to install OSS version of Filebeat? + +Deploying OSS version of Filebeat can be done by setting `image` value to +[Filebeat OSS Docker image][] + +An example of Filebeat deployment using OSS version can be found in +[examples/oss][]. + +### Why is Filebeat host.name field set to Kubernetes pod name? + +The default Filebeat configuration is using Filebeat pod name for +`agent.hostname` and `host.name` fields. The `hostname` of the Kubernetes nodes +can be find in `kubernetes.node.name` field. If you would like to have +`agent.hostname` and `host.name` fields set to the hostname of the nodes, you'll +need to set `hostNetworking` value to true. + +Note that enabling [hostNetwork][] make Filebeat pod use the host network +namespace which gives it access to the host loopback device, services listening +on localhost, could be used to snoop on network activity of other pods on the +same node. + +### How do I get multiple beats agents working with hostNetworking enabled? + +The default http port for multiple beats agents may be on the same port, for +example, Filebeats and Metricbeats both default to 5066. When `hostNetworking` +is enabled this will cause collisions when standing up the http server. The work +around for this is to set `http.port` in the config file for one of the beats agent +to use a different port. + +### How to change readinessProbe for outputs which don't support testing + +Some [Filebeat outputs][] like [Kafka output][] don't support testing using +`filebeat test output` command which is used by Filebeat chart readiness probe. + +This makes Filebeat pods crash before being ready with the following message: +`Readiness probe failed: kafka output doesn't support testing`. + +The workaround when using this kind of output is to override the readiness probe +command to check Filebeat API instead (same as existing liveness probe). + +``` +readinessProbe: + exec: + command: + - sh + - -c + - | + #!/usr/bin/env bash -e + curl --fail 127.0.0.1:5066 +``` + + +## Contributing + +Please check [CONTRIBUTING.md][] before any contribution or for any questions +about our development and testing process. + +[affinity]: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity +[annotations]: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ +[BREAKING_CHANGES.md]: https://github.com/elastic/helm-charts/blob/main/BREAKING_CHANGES.md +[CHANGELOG.md]: https://github.com/elastic/helm-charts/blob/main/CHANGELOG.md +[cluster role rules]: https://kubernetes.io/docs/reference/access-authn-authz/rbac/#role-and-clusterrole +[CONTRIBUTING.md]: https://github.com/elastic/helm-charts/blob/main/CONTRIBUTING.md +[eck-charts]: https://github.com/elastic/cloud-on-k8s/tree/master/deploy +[elastic cloud on kubernetes]: https://github.com/elastic/cloud-on-k8s +[environment from variables]: https://kubernetes.io/docs/tasks/configure-pod-container/configure-pod-configmap/#configure-all-key-value-pairs-in-a-configmap-as-container-environment-variables +[environment variables]: https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/#using-environment-variables-inside-of-your-config +[examples]: https://github.com/elastic/helm-charts/tree/main/filebeat/examples +[examples/oss]: https://github.com/elastic/helm-charts/tree/main/filebeat/examples/oss +[examples/security]: https://github.com/elastic/helm-charts/tree/main/filebeat/examples/security +[filebeat docker image]: https://www.elastic.co/guide/en/beats/filebeat/current/running-on-docker.html +[filebeat oss docker image]: https://www.docker.elastic.co/r/beats/filebeat-oss +[filebeat outputs]: https://www.elastic.co/guide/en/beats/filebeat/current/configuring-output.html +[hostAliases]: https://kubernetes.io/docs/concepts/services-networking/add-entries-to-pod-etc-hosts-with-host-aliases/ +[hostNetwork]: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#host-namespaces +[hostPath]: https://kubernetes.io/docs/concepts/storage/volumes/#hostpath +[imagePullPolicy]: https://kubernetes.io/docs/concepts/containers/images/#updating-images +[imagePullSecrets]: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-pod-that-uses-your-secret +[kafka output]: https://www.elastic.co/guide/en/beats/filebeat/current/kafka-output.html +[kubernetes secrets]: https://kubernetes.io/docs/concepts/configuration/secret/ +[labels]: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ +[maxUnavailable]: https://kubernetes.io/docs/tasks/run-application/configure-pdb/#specifying-a-poddisruptionbudget +[nodeSelector]: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector +[podSecurityContext]: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/ +[priorityClass]: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass +[probe]: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/ +[resources]: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/ +[serviceAccount]: https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/ +[supported configurations]: https://github.com/elastic/helm-charts/tree/main/README.md#supported-configurations +[tolerations]: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +[updateStrategy]: https://kubernetes.io/docs/tasks/manage-daemon/update-daemon-set/#daemonset-update-strategy +[values.yaml]: https://github.com/elastic/helm-charts/tree/main/filebeat/values.yaml diff --git a/sregym/observer/filebeat/examples/default/Makefile b/sregym/observer/filebeat/examples/default/Makefile new file mode 100644 index 0000000..6e4a174 --- /dev/null +++ b/sregym/observer/filebeat/examples/default/Makefile @@ -0,0 +1,13 @@ +default: test + +include ../../../helpers/examples.mk + +RELEASE := helm-filebeat-default + +install: + helm upgrade --wait --timeout=$(TIMEOUT) --install $(RELEASE) ../../ + +test: install goss + +purge: + helm del $(RELEASE) diff --git a/sregym/observer/filebeat/examples/default/README.md b/sregym/observer/filebeat/examples/default/README.md new file mode 100644 index 0000000..0b2c0b1 --- /dev/null +++ b/sregym/observer/filebeat/examples/default/README.md @@ -0,0 +1,27 @@ +# Default + +This example deploy Filebeat 8.5.1 using [default values][]. + + +## Usage + +* Deploy [Elasticsearch Helm chart][]. + +* Deploy Filebeat chart with the default values: `make install` + +* You can now setup a port forward to query Filebeat indices: + + ``` + kubectl port-forward svc/elasticsearch-master 9200 + curl localhost:9200/_cat/indices + ``` + + +## Testing + +You can also run [goss integration tests][] using `make test` + + +[elasticsearch helm chart]: https://github.com/elastic/helm-charts/tree/main/elasticsearch/examples/default/ +[goss integration tests]: https://github.com/elastic/helm-charts/tree/main/filebeat/examples/default/test/goss.yaml +[default values]: https://github.com/elastic/helm-charts/tree/main/filebeat/values.yaml diff --git a/sregym/observer/filebeat/examples/default/test/goss.yaml b/sregym/observer/filebeat/examples/default/test/goss.yaml new file mode 100644 index 0000000..936f992 --- /dev/null +++ b/sregym/observer/filebeat/examples/default/test/goss.yaml @@ -0,0 +1,43 @@ +port: + tcp:5066: + listening: true + ip: + - "127.0.0.1" + +mount: + /usr/share/filebeat/data: + exists: true + /run/docker.sock: + exists: true + /var/lib/docker/containers: + exists: true + opts: + - ro + /usr/share/filebeat/filebeat.yml: + exists: true + opts: + - ro + +user: + filebeat: + exists: true + uid: 1000 + gid: 1000 + +http: + https://elasticsearch-master:9200/_cat/indices: + status: 200 + timeout: 2000 + allow-insecure: true + username: "{{ .Env.ELASTICSEARCH_USERNAME }}" + password: "{{ .Env.ELASTICSEARCH_PASSWORD }}" + body: + - "filebeat-8.7.1" + +file: + /usr/share/filebeat/filebeat.yml: + exists: true + contains: + - "add_kubernetes_metadata" + - "output.elasticsearch" + - "elasticsearch-master:9200" diff --git a/sregym/observer/filebeat/examples/deployment/Makefile b/sregym/observer/filebeat/examples/deployment/Makefile new file mode 100644 index 0000000..55a691d --- /dev/null +++ b/sregym/observer/filebeat/examples/deployment/Makefile @@ -0,0 +1,13 @@ +default: test + +include ../../../helpers/examples.mk + +RELEASE := helm-filebeat-deployment + +install: + helm upgrade --wait --timeout=$(TIMEOUT) --install --values values.yaml $(RELEASE) ../../ + +test: install goss + +purge: + helm del $(RELEASE) diff --git a/sregym/observer/filebeat/examples/deployment/README.md b/sregym/observer/filebeat/examples/deployment/README.md new file mode 100644 index 0000000..63c780a --- /dev/null +++ b/sregym/observer/filebeat/examples/deployment/README.md @@ -0,0 +1,27 @@ +# Default + +This example deploy Filebeat 8.5.1 using [default values][] as a Kubernetes Deployment. + + +## Usage + +* Deploy [Elasticsearch Helm chart][]. + +* Deploy Filebeat chart with the default values: `make install` + +* You can now setup a port forward to query Filebeat indices: + + ``` + kubectl port-forward svc/elasticsearch-master 9200 + curl localhost:9200/_cat/indices + ``` + + +## Testing + +You can also run [goss integration tests][] using `make test` + + +[elasticsearch helm chart]: https://github.com/elastic/helm-charts/tree/main/elasticsearch/examples/default/ +[goss integration tests]: https://github.com/elastic/helm-charts/tree/main/filebeat/examples/deployment/test/goss.yaml +[default values]: https://github.com/elastic/helm-charts/tree/main/filebeat/values.yaml diff --git a/sregym/observer/filebeat/examples/deployment/test/goss.yaml b/sregym/observer/filebeat/examples/deployment/test/goss.yaml new file mode 100644 index 0000000..44c6c81 --- /dev/null +++ b/sregym/observer/filebeat/examples/deployment/test/goss.yaml @@ -0,0 +1,9 @@ +http: + https://elasticsearch-master:9200/_cat/indices: + status: 200 + allow-insecure: true + timeout: 2000 + username: "{{ .Env.ELASTICSEARCH_USERNAME }}" + password: "{{ .Env.ELASTICSEARCH_PASSWORD }}" + body: + - "filebeat-8.7.1" diff --git a/sregym/observer/filebeat/examples/deployment/values.yaml b/sregym/observer/filebeat/examples/deployment/values.yaml new file mode 100644 index 0000000..aa0ed7e --- /dev/null +++ b/sregym/observer/filebeat/examples/deployment/values.yaml @@ -0,0 +1,9 @@ +deployment: + enabled: true + resources: + limits: + # Should avoid OOM (Error 137) when running goss tests into the pod + memory: "300Mi" + +daemonset: + enabled: false diff --git a/sregym/observer/filebeat/examples/oss/Makefile b/sregym/observer/filebeat/examples/oss/Makefile new file mode 100644 index 0000000..bf1a48b --- /dev/null +++ b/sregym/observer/filebeat/examples/oss/Makefile @@ -0,0 +1,13 @@ +default: test + +include ../../../helpers/examples.mk + +RELEASE := helm-filebeat-oss + +install: + helm upgrade --wait --timeout=$(TIMEOUT) --install --values values.yaml $(RELEASE) ../../ + +test: install goss + +purge: + helm del $(RELEASE) diff --git a/sregym/observer/filebeat/examples/oss/README.md b/sregym/observer/filebeat/examples/oss/README.md new file mode 100644 index 0000000..a6f4473 --- /dev/null +++ b/sregym/observer/filebeat/examples/oss/README.md @@ -0,0 +1,27 @@ +# OSS + +This example deploy Filebeat 8.5.1 using [Filebeat OSS][] version. + + +## Usage + +* Deploy [Elasticsearch Helm chart][]. + +* Deploy Filebeat chart with the default values: `make install` + +* You can now setup a port forward to query Filebeat indices: + + ``` + kubectl port-forward svc/oss-master 9200 + curl localhost:9200/_cat/indices + ``` + + +## Testing + +You can also run [goss integration tests][] using `make test` + + +[filebeat oss]: https://www.elastic.co/downloads/beats/filebeat-oss +[elasticsearch helm chart]: https://github.com/elastic/helm-charts/tree/main/elasticsearch/examples/oss/ +[goss integration tests]: https://github.com/elastic/helm-charts/tree/main/filebeat/examples/oss/test/goss.yaml diff --git a/sregym/observer/filebeat/examples/oss/test/goss.yaml b/sregym/observer/filebeat/examples/oss/test/goss.yaml new file mode 100644 index 0000000..ee9e095 --- /dev/null +++ b/sregym/observer/filebeat/examples/oss/test/goss.yaml @@ -0,0 +1,25 @@ +port: + tcp:5066: + listening: true + ip: + - "127.0.0.1" + +mount: + /usr/share/filebeat/data: + exists: true + +user: + filebeat: + exists: true + uid: 1000 + gid: 1000 + +http: + https://elasticsearch-master:9200/_cat/indices: + allow-insecure: true + status: 200 + timeout: 2000 + username: "{{ .Env.ELASTICSEARCH_USERNAME }}" + password: "{{ .Env.ELASTICSEARCH_PASSWORD }}" + body: + - "filebeat-oss-8.7.1" diff --git a/sregym/observer/filebeat/examples/oss/values.yaml b/sregym/observer/filebeat/examples/oss/values.yaml new file mode 100644 index 0000000..066f293 --- /dev/null +++ b/sregym/observer/filebeat/examples/oss/values.yaml @@ -0,0 +1,34 @@ +image: docker.elastic.co/beats/filebeat-oss + +daemonset: + filebeatConfig: + filebeat.yml: | + filebeat.inputs: + - type: container + paths: + - /var/log/containers/*.log + processors: + - add_kubernetes_metadata: + host: ${NODE_NAME} + matchers: + - logs_path: + logs_path: "/var/log/containers/" + output.elasticsearch: + host: '${NODE_NAME}' + hosts: ["https://elasticsearch-master:9200"] + username: '${ELASTICSEARCH_USERNAME}' + password: '${ELASTICSEARCH_PASSWORD}' + index: "filebeat-oss-%{[agent.version]}-%{+yyyy.MM.dd}" + ssl.certificate_authorities: + - /usr/share/filebeat/certs/ca.crt + setup.ilm.enabled: false + setup.template.name: "filebeat" + setup.template.pattern: "filebeat-oss-*" + secretMounts: + - name: elasticsearch-master-certs + secretName: elasticsearch-master-certs + path: /usr/share/filebeat/certs + resources: + limits: + # Should avoid OOM (Error 137) when running goss tests into the pod + memory: "300Mi" diff --git a/sregym/observer/filebeat/examples/security/Makefile b/sregym/observer/filebeat/examples/security/Makefile new file mode 100644 index 0000000..674b911 --- /dev/null +++ b/sregym/observer/filebeat/examples/security/Makefile @@ -0,0 +1,13 @@ +default: test + +include ../../../helpers/examples.mk + +RELEASE := helm-filebeat-security + +install: + helm upgrade --wait --timeout=$(TIMEOUT) --install --values values.yaml $(RELEASE) ../../ + +test: install goss + +purge: + helm del $(RELEASE) diff --git a/sregym/observer/filebeat/examples/security/README.md b/sregym/observer/filebeat/examples/security/README.md new file mode 100644 index 0000000..a135afa --- /dev/null +++ b/sregym/observer/filebeat/examples/security/README.md @@ -0,0 +1,28 @@ +# Security + +This example deploy Filebeat 8.5.1 using authentication and TLS to connect to +Elasticsearch (see [values][]). + + +## Usage + +* Deploy [Elasticsearch Helm chart][]. + +* Deploy Filebeat chart with security: `make install` + +* You can now setup a port forward to query Filebeat indices: + + ``` + kubectl port-forward svc/security-master 9200 + curl -u elastic:changeme https://localhost:9200/_cat/indices + ``` + + +## Testing + +You can also run [goss integration tests][] using `make test` + + +[elasticsearch helm chart]: https://github.com/elastic/helm-charts/tree/main/elasticsearch/examples/security/ +[goss integration tests]: https://github.com/elastic/helm-charts/tree/main/filebeat/examples/security/test/goss.yaml +[values]: https://github.com/elastic/helm-charts/tree/main/filebeat/examples/security/values.yaml diff --git a/sregym/observer/filebeat/examples/security/test/goss.yaml b/sregym/observer/filebeat/examples/security/test/goss.yaml new file mode 100644 index 0000000..fab5696 --- /dev/null +++ b/sregym/observer/filebeat/examples/security/test/goss.yaml @@ -0,0 +1,9 @@ +http: + https://security-master:9200/_cat/indices: + status: 200 + timeout: 2000 + body: + - "filebeat-8.7.1" + allow-insecure: true + username: "{{ .Env.ELASTICSEARCH_USERNAME }}" + password: "{{ .Env.ELASTICSEARCH_PASSWORD }}" diff --git a/sregym/observer/filebeat/examples/security/values.yaml b/sregym/observer/filebeat/examples/security/values.yaml new file mode 100644 index 0000000..160dd1e --- /dev/null +++ b/sregym/observer/filebeat/examples/security/values.yaml @@ -0,0 +1,43 @@ +daemonset: + extraEnvs: + - name: "ELASTICSEARCH_HOSTS" + value: "security-master:9200" + - name: "ELASTICSEARCH_USERNAME" + valueFrom: + secretKeyRef: + name: security-master-credentials + key: username + - name: "ELASTICSEARCH_PASSWORD" + valueFrom: + secretKeyRef: + name: security-master-credentials + key: password + filebeatConfig: + filebeat.yml: | + filebeat.inputs: + - type: container + paths: + - /var/log/containers/*.log + processors: + - add_kubernetes_metadata: + host: ${NODE_NAME} + matchers: + - logs_path: + logs_path: "/var/log/containers/" + + output.elasticsearch: + host: '${NODE_NAME}' + hosts: '${ELASTICSEARCH_HOSTS:elasticsearch-master:9200}' + username: '${ELASTICSEARCH_USERNAME}' + password: '${ELASTICSEARCH_PASSWORD}' + protocol: https + ssl.certificate_authorities: + - /usr/share/filebeat/config/certs/elastic-certificate.pem + secretMounts: + - name: elastic-certificate-pem + secretName: elastic-certificate-pem + path: /usr/share/filebeat/config/certs + resources: + limits: + # Should avoid OOM (Error 137) when running goss tests into the pod + memory: "300Mi" diff --git a/sregym/observer/filebeat/examples/upgrade/Makefile b/sregym/observer/filebeat/examples/upgrade/Makefile new file mode 100644 index 0000000..c583cfa --- /dev/null +++ b/sregym/observer/filebeat/examples/upgrade/Makefile @@ -0,0 +1,17 @@ +default: test + +include ../../../helpers/examples.mk + +CHART := filebeat +RELEASE := helm-filebeat-upgrade +# upgrade from versions before 7.17.1 isn't compatible with 8.x +FROM := 7.17.1 + +install: + ../../../helpers/upgrade.sh --chart $(CHART) --release $(RELEASE) --from $(FROM) + kubectl rollout status daemonset $(RELEASE)-filebeat + +test: install goss + +purge: + helm del $(RELEASE) diff --git a/sregym/observer/filebeat/examples/upgrade/README.md b/sregym/observer/filebeat/examples/upgrade/README.md new file mode 100644 index 0000000..a5c06d3 --- /dev/null +++ b/sregym/observer/filebeat/examples/upgrade/README.md @@ -0,0 +1,21 @@ +# Upgrade + +This example will deploy Filebeat chart using an old chart version, +then upgrade it. + + +## Usage + +* Add the Elastic Helm charts repo: `helm repo add elastic https://helm.elastic.co` + +* Deploy [Elasticsearch Helm chart][]: `helm install elasticsearch elastic/elasticsearch` + +* Deploy and upgrade Filebeat chart with the default values: `make install` + + +## Testing + +You can also run [goss integration tests][] using `make test`. + + +[goss integration tests]: https://github.com/elastic/helm-charts/tree/main/filebeat/examples/upgrade/test/goss.yaml diff --git a/sregym/observer/filebeat/examples/upgrade/test/goss.yaml b/sregym/observer/filebeat/examples/upgrade/test/goss.yaml new file mode 100644 index 0000000..cd84525 --- /dev/null +++ b/sregym/observer/filebeat/examples/upgrade/test/goss.yaml @@ -0,0 +1,48 @@ +port: + tcp:5066: + listening: true + ip: + - "127.0.0.1" + +mount: + /usr/share/filebeat/data: + exists: true + /run/docker.sock: + exists: true + /var/lib/docker/containers: + exists: true + opts: + - ro + /usr/share/filebeat/filebeat.yml: + exists: true + opts: + - ro + +user: + filebeat: + exists: true + uid: 1000 + gid: 1000 + +http: + https://upgrade-master:9200/_cat/indices: + status: 200 + allow-insecure: true + timeout: 2000 + body: + - "filebeat-8.7.1" + username: "{{ .Env.ELASTICSEARCH_USERNAME }}" + password: "{{ .Env.ELASTICSEARCH_PASSWORD }}" + +file: + /usr/share/filebeat/filebeat.yml: + exists: true + contains: + - "add_kubernetes_metadata" + - "output.elasticsearch" + +command: + cd /usr/share/filebeat && filebeat test output: + exit-status: 0 + stdout: + - "elasticsearch: https://upgrade-master:9200" diff --git a/sregym/observer/filebeat/examples/upgrade/values.yaml b/sregym/observer/filebeat/examples/upgrade/values.yaml new file mode 100644 index 0000000..025ddf2 --- /dev/null +++ b/sregym/observer/filebeat/examples/upgrade/values.yaml @@ -0,0 +1,43 @@ +extraEnvs: + - name: "ELASTICSEARCH_HOSTS" + value: "https://upgrade-master:9200" + - name: "ELASTICSEARCH_USERNAME" + valueFrom: + secretKeyRef: + name: upgrade-master-credentials + key: username + - name: "ELASTICSEARCH_PASSWORD" + valueFrom: + secretKeyRef: + name: upgrade-master-credentials + key: password + - name: ssl.certificate_authorities + value: "/usr/share/filebeat/certs/ca.crt" +filebeatConfig: + filebeat.yml: | + filebeat.inputs: + - type: container + paths: + - /var/log/containers/*.log + processors: + - add_kubernetes_metadata: + host: ${NODE_NAME} + matchers: + - logs_path: + logs_path: "/var/log/containers/" + output.elasticsearch: + host: '${NODE_NAME}' + hosts: '${ELASTICSEARCH_HOSTS:upgrade-master:9200}' + username: '${ELASTICSEARCH_USERNAME}' + password: '${ELASTICSEARCH_PASSWORD}' + protocol: https + ssl.certificate_authorities: + - /usr/share/filebeat/certs/ca.crt +secretMounts: + - name: upgrade-master-certs + secretName: upgrade-master-certs + path: /usr/share/filebeat/certs +resources: + limits: + # Should avoid OOM (Error 137) when running goss tests into the pod + memory: "300Mi" diff --git a/sregym/observer/filebeat/templates/NOTES.txt b/sregym/observer/filebeat/templates/NOTES.txt new file mode 100644 index 0000000..e097e87 --- /dev/null +++ b/sregym/observer/filebeat/templates/NOTES.txt @@ -0,0 +1,2 @@ +1. Watch all containers come up. + $ kubectl get pods --namespace={{ .Release.Namespace }} -l app={{ template "filebeat.fullname" . }} -w diff --git a/sregym/observer/filebeat/templates/_helpers.tpl b/sregym/observer/filebeat/templates/_helpers.tpl new file mode 100644 index 0000000..3e5627f --- /dev/null +++ b/sregym/observer/filebeat/templates/_helpers.tpl @@ -0,0 +1,32 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "filebeat.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "filebeat.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{/* +Use the fullname if the serviceAccount value is not set +*/}} +{{- define "filebeat.serviceAccount" -}} +{{- if .Values.serviceAccount }} +{{- .Values.serviceAccount -}} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} diff --git a/sregym/observer/filebeat/templates/clusterrole.yaml b/sregym/observer/filebeat/templates/clusterrole.yaml new file mode 100644 index 0000000..762afec --- /dev/null +++ b/sregym/observer/filebeat/templates/clusterrole.yaml @@ -0,0 +1,12 @@ +{{- if .Values.managedServiceAccount }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ template "filebeat.serviceAccount" . }}-cluster-role + labels: + app: "{{ template "filebeat.fullname" . }}" + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +rules: {{ toYaml .Values.clusterRoleRules | nindent 2 -}} +{{- end -}} diff --git a/sregym/observer/filebeat/templates/clusterrolebinding.yaml b/sregym/observer/filebeat/templates/clusterrolebinding.yaml new file mode 100644 index 0000000..ed41773 --- /dev/null +++ b/sregym/observer/filebeat/templates/clusterrolebinding.yaml @@ -0,0 +1,19 @@ +{{- if .Values.managedServiceAccount }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ template "filebeat.serviceAccount" . }}-cluster-role-binding + labels: + app: "{{ template "filebeat.fullname" . }}" + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +roleRef: + kind: ClusterRole + name: {{ template "filebeat.serviceAccount" . }}-cluster-role + apiGroup: rbac.authorization.k8s.io +subjects: +- kind: ServiceAccount + name: {{ template "filebeat.serviceAccount" . }} + namespace: {{ .Release.Namespace }} +{{- end -}} diff --git a/sregym/observer/filebeat/templates/configmap.yaml b/sregym/observer/filebeat/templates/configmap.yaml new file mode 100644 index 0000000..56b5cb3 --- /dev/null +++ b/sregym/observer/filebeat/templates/configmap.yaml @@ -0,0 +1,53 @@ +{{- if .Values.filebeatConfig }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "filebeat.fullname" . }}-config + labels: + app: "{{ template "filebeat.fullname" . }}" + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +data: +{{- range $path, $config := .Values.filebeatConfig }} + {{ $path }}: | +{{ $config | indent 4 -}} +{{- end -}} +{{- end -}} + +{{- if and .Values.daemonset.enabled .Values.daemonset.filebeatConfig }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "filebeat.fullname" . }}-daemonset-config + labels: + app: "{{ template "filebeat.fullname" . }}" + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +data: +{{- range $path, $config := .Values.daemonset.filebeatConfig }} + {{ $path }}: | +{{ $config | indent 4 -}} +{{- end -}} +{{- end -}} + +{{- if and .Values.deployment.enabled .Values.deployment.filebeatConfig }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "filebeat.fullname" . }}-deployment-config + labels: + app: "{{ template "filebeat.fullname" . }}" + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +data: +{{- range $path, $config := .Values.deployment.filebeatConfig }} + {{ $path }}: | +{{ $config | indent 4 -}} +{{- end -}} +{{- end -}} diff --git a/sregym/observer/filebeat/templates/daemonset.yaml b/sregym/observer/filebeat/templates/daemonset.yaml new file mode 100644 index 0000000..71ddc1c --- /dev/null +++ b/sregym/observer/filebeat/templates/daemonset.yaml @@ -0,0 +1,201 @@ +{{- if .Values.daemonset.enabled }} +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ template "filebeat.fullname" . }} + labels: + app: "{{ template "filebeat.fullname" . }}" + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + {{- if .Values.daemonset.labels }} + {{- range $key, $value := .Values.daemonset.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- else }} + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + {{- if .Values.daemonset.annotations }} + annotations: + {{- range $key, $value := .Values.daemonset.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} +spec: + selector: + matchLabels: + app: "{{ template "filebeat.fullname" . }}" + release: {{ .Release.Name | quote }} + updateStrategy: + {{- if eq .Values.updateStrategy "RollingUpdate" }} + rollingUpdate: + maxUnavailable: {{ .Values.daemonset.maxUnavailable }} + {{- end }} + type: {{ .Values.updateStrategy }} + template: + metadata: + annotations: + {{- range $key, $value := .Values.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{/* This forces a restart if the configmap has changed */}} + {{- if or .Values.filebeatConfig .Values.daemonset.filebeatConfig }} + configChecksum: {{ include (print .Template.BasePath "/configmap.yaml") . | sha256sum | trunc 63 }} + {{- end }} + name: "{{ template "filebeat.fullname" . }}" + labels: + app: "{{ template "filebeat.fullname" . }}" + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + {{- if .Values.daemonset.labels }} + {{- range $key, $value := .Values.daemonset.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- else }} + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + spec: + tolerations: {{ toYaml ( .Values.tolerations | default .Values.daemonset.tolerations ) | nindent 8 }} + nodeSelector: {{ toYaml ( .Values.nodeSelector | default .Values.daemonset.nodeSelector ) | nindent 8 }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + affinity: {{ toYaml ( .Values.affinity | default .Values.daemonset.affinity ) | nindent 8 }} + serviceAccountName: {{ template "filebeat.serviceAccount" . }} + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriod }} + {{- if .Values.daemonset.hostNetworking }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + {{- end }} + {{- if .Values.dnsConfig }} + dnsConfig: {{ toYaml .Values.dnsConfig | nindent 8 }} + {{- end }} + {{- if .Values.hostAliases | default .Values.daemonset.hostAliases }} + hostAliases: {{ toYaml ( .Values.hostAliases | default .Values.daemonset.hostAliases ) | nindent 8 }} + {{- end }} + volumes: + {{- range .Values.secretMounts | default .Values.daemonset.secretMounts }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + {{- end }} + {{- if .Values.filebeatConfig }} + - name: filebeat-config + configMap: + defaultMode: 0600 + name: {{ template "filebeat.fullname" . }}-config + {{- else if .Values.daemonset.filebeatConfig }} + - name: filebeat-config + configMap: + defaultMode: 0600 + name: {{ template "filebeat.fullname" . }}-daemonset-config + {{- end }} + - name: data + hostPath: + path: {{ .Values.hostPathRoot }}/{{ template "filebeat.fullname" . }}-{{ .Release.Namespace }}-data + type: DirectoryOrCreate + - name: varlibdockercontainers + hostPath: + path: /var/lib/docker/containers + - name: varlog + hostPath: + path: /var/log + - name: varrundockersock + hostPath: + path: /var/run/docker.sock + {{- if .Values.extraVolumes | default .Values.daemonset.extraVolumes }} +{{ toYaml ( .Values.extraVolumes | default .Values.daemonset.extraVolumes ) | indent 6 }} + {{- end }} + {{- if .Values.imagePullSecrets }} + imagePullSecrets: +{{ toYaml .Values.imagePullSecrets | indent 8 }} + {{- end }} + {{- if .Values.extraInitContainers }} + initContainers: + # All the other beats accept a string here while + # filebeat accepts a valid yaml array. We're keeping + # this as a backwards compatible change, while adding + # also a way to pass a string as other templates to + # make these implementations consistent. + # https://github.com/elastic/helm-charts/issues/490 + {{- if eq "string" (printf "%T" .Values.extraInitContainers) }} +{{ tpl .Values.extraInitContainers . | indent 8 }} + {{- else }} +{{ toYaml .Values.extraInitContainers | indent 8 }} + {{- end }} + {{- end }} + containers: + - name: "filebeat" + image: "{{ .Values.image }}:{{ .Values.imageTag }}" + imagePullPolicy: "{{ .Values.imagePullPolicy }}" + args: + - "-e" + - "-E" + - "http.enabled=true" + livenessProbe: +{{ toYaml .Values.livenessProbe | indent 10 }} + readinessProbe: +{{ toYaml .Values.readinessProbe | indent 10 }} + resources: +{{ toYaml ( .Values.resources | default .Values.daemonset.resources ) | indent 10 }} + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName +{{- if .Values.extraEnvs | default .Values.daemonset.extraEnvs }} +{{ toYaml ( .Values.extraEnvs | default .Values.daemonset.extraEnvs ) | indent 8 }} +{{- end }} + envFrom: {{ toYaml ( .Values.envFrom | default .Values.daemonset.envFrom ) | nindent 10 }} + securityContext: {{ toYaml ( .Values.podSecurityContext | default .Values.daemonset.securityContext ) | nindent 10 }} + volumeMounts: + {{- range .Values.secretMounts | default .Values.daemonset.secretMounts }} + - name: {{ .name }} + mountPath: {{ .path }} + {{- if .subPath }} + subPath: {{ .subPath }} + {{- end }} + {{- end }} + {{- range $path, $config := .Values.filebeatConfig }} + - name: filebeat-config + mountPath: /usr/share/filebeat/{{ $path }} + readOnly: true + subPath: {{ $path }} + {{ else }} + {{- range $path, $config := .Values.daemonset.filebeatConfig }} + - name: filebeat-config + mountPath: /usr/share/filebeat/{{ $path }} + readOnly: true + subPath: {{ $path }} + {{- end }} + {{- end }} + - name: data + mountPath: /usr/share/filebeat/data + - name: varlibdockercontainers + mountPath: /var/lib/docker/containers + readOnly: true + - name: varlog + mountPath: /var/log + readOnly: true + # Necessary when using autodiscovery; avoid mounting it otherwise + # See: https://www.elastic.co/guide/en/beats/filebeat/master/configuration-autodiscover.html + - name: varrundockersock + mountPath: /var/run/docker.sock + readOnly: true + {{- if .Values.extraVolumeMounts | default .Values.daemonset.extraVolumeMounts }} +{{ toYaml (.Values.extraVolumeMounts | default .Values.daemonset.extraVolumeMounts ) | indent 8 }} + {{- end }} + {{- if .Values.extraContainers }} +{{ tpl .Values.extraContainers . | indent 6 }} + {{- end }} +{{- end }} diff --git a/sregym/observer/filebeat/templates/deployment.yaml b/sregym/observer/filebeat/templates/deployment.yaml new file mode 100644 index 0000000..414aa3f --- /dev/null +++ b/sregym/observer/filebeat/templates/deployment.yaml @@ -0,0 +1,157 @@ +# Deploy singleton instance in the whole cluster for some unique data sources, like aws input +{{- if .Values.deployment.enabled }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "filebeat.fullname" . }} + labels: + app: "{{ template "filebeat.fullname" . }}" + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + heritage: '{{ .Release.Service }}' + release: {{ .Release.Name }} + {{- if .Values.deployment.labels }} + {{- range $key, $value := .Values.deployment.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- else }} + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + {{- if .Values.deployment.annotations }} + annotations: + {{- range $key, $value := .Values.deployment.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} +spec: + replicas: {{ .Values.replicas }} + selector: + matchLabels: + app: "{{ template "filebeat.fullname" . }}" + release: {{ .Release.Name | quote }} + template: + metadata: + annotations: + {{- range $key, $value := .Values.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{/* This forces a restart if the configmap has changed */}} + {{- if or .Values.filebeatConfig .Values.deployment.filebeatConfig }} + configChecksum: {{ include (print .Template.BasePath "/configmap.yaml") . | sha256sum | trunc 63 }} + {{- end }} + labels: + app: '{{ template "filebeat.fullname" . }}' + chart: '{{ .Chart.Name }}-{{ .Chart.Version }}' + release: '{{ .Release.Name }}' + {{- if .Values.deployment.labels }} + {{- range $key, $value := .Values.deployment.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- else }} + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + spec: + affinity: {{ toYaml .Values.deployment.affinity | nindent 8 }} + nodeSelector: {{ toYaml .Values.deployment.nodeSelector | nindent 8 }} + tolerations: {{ toYaml ( .Values.tolerations | default .Values.deployment.tolerations ) | nindent 8 }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + serviceAccountName: {{ template "filebeat.serviceAccount" . }} + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriod }} + {{- if .Values.deployment.hostAliases }} + hostAliases: {{ toYaml .Values.deployment.hostAliases | nindent 8 }} + {{- end }} + volumes: + {{- range .Values.secretMounts | default .Values.deployment.secretMounts }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + {{- end }} + {{- if .Values.filebeatConfig }} + - name: filebeat-config + configMap: + defaultMode: 0600 + name: {{ template "filebeat.fullname" . }}-config + {{- else if .Values.deployment.filebeatConfig }} + - name: filebeat-config + configMap: + defaultMode: 0600 + name: {{ template "filebeat.fullname" . }}-deployment-config + {{- end }} + {{- if .Values.extraVolumes | default .Values.deployment.extraVolumes }} +{{ toYaml ( .Values.extraVolumes | default .Values.deployment.extraVolumes ) | indent 6 }} + {{- end }} + {{- if .Values.imagePullSecrets }} + imagePullSecrets: +{{ toYaml .Values.imagePullSecrets | indent 8 }} + {{- end }} + {{- if .Values.extraInitContainers }} + initContainers: + # All the other beats accept a string here while + # filebeat accepts a valid yaml array. We're keeping + # this as a backwards compatible change, while adding + # also a way to pass a string as other templates to + # make these implementations consistent. + # https://github.com/elastic/helm-charts/issues/490 + {{- if eq "string" (printf "%T" .Values.extraInitContainers) }} +{{ tpl .Values.extraInitContainers . | indent 6 }} + {{- else }} +{{ toYaml .Values.extraInitContainers | indent 6 }} + {{- end }} + {{- end }} + containers: + - name: "filebeat" + image: "{{ .Values.image }}:{{ .Values.imageTag }}" + imagePullPolicy: "{{ .Values.imagePullPolicy }}" + args: + - "-e" + - "-E" + - "http.enabled=true" + livenessProbe: +{{ toYaml .Values.livenessProbe | indent 10 }} + readinessProbe: +{{ toYaml .Values.readinessProbe | indent 10 }} + resources: {{ toYaml ( .Values.resources | default .Values.deployment.resources ) | nindent 10 }} + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace +{{- if .Values.extraEnvs | default .Values.deployment.extraEnvs }} +{{ toYaml ( .Values.extraEnvs | default .Values.deployment.extraEnvs ) | indent 8 }} +{{- end }} + envFrom: {{ toYaml ( .Values.envFrom | default .Values.deployment.envFrom ) | nindent 10 }} + securityContext: {{ toYaml ( .Values.podSecurityContext | default .Values.deployment.securityContext ) | nindent 10 }} + volumeMounts: + {{- range .Values.secretMounts | default .Values.deployment.secretMounts }} + - name: {{ .name }} + mountPath: {{ .path }} + {{- if .subPath }} + subPath: {{ .subPath }} + {{- end }} + {{- end }} + {{- range $path, $config := .Values.filebeatConfig }} + - name: filebeat-config + mountPath: /usr/share/filebeat/{{ $path }} + readOnly: true + subPath: {{ $path }} + {{ else }} + {{- range $path, $config := .Values.deployment.filebeatConfig }} + - name: filebeat-config + mountPath: /usr/share/filebeat/{{ $path }} + readOnly: true + subPath: {{ $path }} + {{- end }} + {{- end }} + {{- if .Values.extraVolumeMounts | default .Values.deployment.extraVolumeMounts }} +{{ toYaml ( .Values.extraVolumeMounts | default .Values.deployment.extraVolumeMounts ) | indent 8 }} + {{- end }} + {{- if .Values.extraContainers }} +{{ tpl .Values.extraContainers . | indent 6 }} + {{- end }} +{{- end }} diff --git a/sregym/observer/filebeat/templates/role.yaml b/sregym/observer/filebeat/templates/role.yaml new file mode 100644 index 0000000..2b09ec2 --- /dev/null +++ b/sregym/observer/filebeat/templates/role.yaml @@ -0,0 +1,14 @@ +{{- if .Values.managedServiceAccount }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ template "filebeat.serviceAccount" . }}-role + labels: + app: "{{ template "filebeat.fullname" . }}" +rules: + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: ["get", "create", "update"] +{{- end -}} diff --git a/sregym/observer/filebeat/templates/rolebinding.yaml b/sregym/observer/filebeat/templates/rolebinding.yaml new file mode 100644 index 0000000..f6b9cf8 --- /dev/null +++ b/sregym/observer/filebeat/templates/rolebinding.yaml @@ -0,0 +1,19 @@ +{{- if .Values.managedServiceAccount }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ template "filebeat.serviceAccount" . }}-role-binding + labels: + app: "{{ template "filebeat.fullname" . }}" + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +roleRef: + kind: Role + name: {{ template "filebeat.serviceAccount" . }}-role + apiGroup: rbac.authorization.k8s.io +subjects: +- kind: ServiceAccount + name: {{ template "filebeat.serviceAccount" . }} + namespace: {{ .Release.Namespace }} +{{- end -}} diff --git a/sregym/observer/filebeat/templates/serviceaccount.yaml b/sregym/observer/filebeat/templates/serviceaccount.yaml new file mode 100644 index 0000000..8e497b3 --- /dev/null +++ b/sregym/observer/filebeat/templates/serviceaccount.yaml @@ -0,0 +1,15 @@ +{{- if .Values.managedServiceAccount }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "filebeat.serviceAccount" . }} + annotations: + {{- with .Values.serviceAccountAnnotations }} + {{- toYaml . | nindent 4 }} + {{- end }} + labels: + app: "{{ template "filebeat.fullname" . }}" + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +{{- end -}} diff --git a/sregym/observer/filebeat/values.yaml b/sregym/observer/filebeat/values.yaml new file mode 100644 index 0000000..461a9a4 --- /dev/null +++ b/sregym/observer/filebeat/values.yaml @@ -0,0 +1,279 @@ +--- +daemonset: + # Annotations to apply to the daemonset + annotations: {} + # additionals labels + labels: {} + affinity: {} + # Include the daemonset + enabled: true + # Extra environment variables for Filebeat container. + envFrom: [] + # - configMapRef: + # name: config-secret + extraEnvs: + - name: "ELASTICSEARCH_USERNAME" + valueFrom: + secretKeyRef: + name: es-username + # name: elasticsearch-master-credentials + key: username + - name: "ELASTICSEARCH_PASSWORD" + valueFrom: + secretKeyRef: + # name: elasticsearch-master-credentials + name: es-password + key: password + # Allows you to add any config files in /usr/share/filebeat + extraVolumes: [] + # - name: extras + # emptyDir: {} + extraVolumeMounts: [] + # - name: extras + # mountPath: /usr/share/extras + # readOnly: true + hostNetworking: false + # Allows you to add any config files in /usr/share/filebeat + # such as filebeat.yml for daemonset + filebeatConfig: + filebeat.yml: | + filebeat.autodiscover: + providers: + - type: kubernetes + node: ${NODE_NAME} + templates: + - condition: + contains: + kubernetes.namespace: "hotel-reservation" + config: + - type: container + paths: + - /var/log/containers/*-${data.kubernetes.container.id}.log + processors: + - add_kubernetes_metadata: + host: ${NODE_NAME} + matchers: + - logs_path: + logs_path: "/var/log/containers/" + output.logstash: + hosts: ["logstash-logstash:5044"] + + # filebeat.config.modules: + # path: ${path.config}/modules.d/*.yml + # reload.enabled: false + # filebeat.config.modules: + # path: ${path.config}/modules.d/*.yml + # reload.enabled: false + # filebeat.inputs: + # - type: container + # paths: + # - /var/log/containers/*.log + # processors: + # - add_kubernetes_metadata: + # host: ${NODE_NAME} + # matchers: + # - logs_path: + # logs_path: "/var/log/containers/" + # Only used when updateStrategy is set to "RollingUpdate" + maxUnavailable: 1 + nodeSelector: {} + # A list of secrets and their paths to mount inside the pod + # This is useful for mounting certificates for security other sensitive values + # secretMounts: + # - name: elasticsearch-master-certs + # secretName: elasticsearch-master-certs + # path: /usr/share/filebeat/certs/ + + # - name: filebeat-certificates + # secretName: filebeat-certificates + # path: /usr/share/filebeat/certs + # Various pod security context settings. Bear in mind that many of these have an impact on Filebeat functioning properly. + # + # - User that the container will execute as. Typically necessary to run as root (0) in order to properly collect host container logs. + # - Whether to execute the Filebeat containers as privileged containers. Typically not necessarily unless running within environments such as OpenShift. + securityContext: + runAsUser: 0 + privileged: false + resources: + requests: + cpu: "400m" + memory: "500Mi" + limits: + cpu: "1000m" + memory: "2000Mi" + tolerations: [] + +deployment: + # Annotations to apply to the deployment + annotations: {} + # additionals labels + labels: {} + affinity: {} + # Include the deployment + enabled: false + # Extra environment variables for Filebeat container. + envFrom: [] + # - configMapRef: + # name: config-secret + extraEnvs: + - name: "ELASTICSEARCH_USERNAME" + valueFrom: + secretKeyRef: + name: elasticsearch-master-credentials + key: username + - name: "ELASTICSEARCH_PASSWORD" + valueFrom: + secretKeyRef: + name: elasticsearch-master-credentials + key: password + # Allows you to add any config files in /usr/share/filebeat + extraVolumes: [] + # - name: extras + # emptyDir: {} + extraVolumeMounts: [] + # - name: extras + # mountPath: /usr/share/extras + # readOnly: true + # such as filebeat.yml for deployment + filebeatConfig: + filebeat.yml: | + filebeat.inputs: + - type: log + paths: + - /usr/share/filebeat/logs/filebeat + fields: + sample_rate: 0.1 + + output.elasticsearch: + host: "${NODE_NAME}" + hosts: '["https://${ELASTICSEARCH_HOSTS:elasticsearch-master:9200}"]' + username: "${ELASTICSEARCH_USERNAME}" + password: "${ELASTICSEARCH_PASSWORD}" + protocol: https + ssl.verification_mode: none + ssl.certificate_authorities: ["/usr/share/filebeat/certs/ca.crt"] + nodeSelector: {} + # A list of secrets and their paths to mount inside the pod + # This is useful for mounting certificates for security other sensitive values + secretMounts: + - name: elasticsearch-master-certs + secretName: elasticsearch-master-certs + path: /usr/share/filebeat/certs/ + # - name: filebeat-certificates + # secretName: filebeat-certificates + # path: /usr/share/filebeat/certs + # + # - User that the container will execute as. + # Not necessary to run as root (0) as the Filebeat Deployment use cases do not need access to Kubernetes Node internals + # - Typically not necessarily unless running within environments such as OpenShift. + securityContext: + runAsUser: 0 + privileged: false + resources: + requests: + cpu: "100m" + memory: "100Mi" + limits: + cpu: "1000m" + memory: "200Mi" + tolerations: [] + +# Replicas being used for the filebeat deployment +replicas: 1 + +extraContainers: "" +# - name: dummy-init +# image: busybox +# command: ['echo', 'hey'] + +extraInitContainers: [] +# - name: dummy-init + +# Root directory where Filebeat will write data to in order to persist registry data across pod restarts (file position and other metadata). +hostPathRoot: /var/lib + +dnsConfig: {} +# options: +# - name: ndots +# value: "2" +hostAliases: [] +#- ip: "127.0.0.1" +# hostnames: +# - "foo.local" +# - "bar.local" +image: "docker.elastic.co/beats/filebeat" +imageTag: "8.7.1" +imagePullPolicy: "IfNotPresent" +imagePullSecrets: [] + +livenessProbe: + exec: + command: + - sh + - -c + - | + #!/usr/bin/env bash -e + curl --fail 127.0.0.1:5066 + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + +readinessProbe: + exec: + command: + - sh + - -c + - | + #!/usr/bin/env bash -e + filebeat test output + failureThreshold: 3 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + +# Whether this chart should self-manage its service account, role, and associated role binding. +managedServiceAccount: true + +clusterRoleRules: + - apiGroups: + - "" + resources: + - namespaces + - nodes + - pods + verbs: + - get + - list + - watch + - apiGroups: + - "apps" + resources: + - replicasets + verbs: + - get + - list + - watch + +podAnnotations: {} +# iam.amazonaws.com/role: es-cluster + +# Custom service account override that the pod will use +serviceAccount: "" + +# Annotations to add to the ServiceAccount that is created if the serviceAccount value isn't set. +serviceAccountAnnotations: {} +# eks.amazonaws.com/role-arn: arn:aws:iam::111111111111:role/k8s.clustername.namespace.serviceaccount + +# How long to wait for Filebeat pods to stop gracefully +terminationGracePeriod: 30 +# This is the PriorityClass settings as defined in +# https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass +priorityClassName: "" + +updateStrategy: RollingUpdate + +# Override various naming aspects of this chart +# Only edit these if you know what you're doing +nameOverride: "" +fullnameOverride: "" diff --git a/sregym/observer/logstash/.helmignore b/sregym/observer/logstash/.helmignore new file mode 100644 index 0000000..516d3f8 --- /dev/null +++ b/sregym/observer/logstash/.helmignore @@ -0,0 +1,2 @@ +tests/ +.pytest_cache/ diff --git a/sregym/observer/logstash/Chart.yaml b/sregym/observer/logstash/Chart.yaml new file mode 100644 index 0000000..063beb1 --- /dev/null +++ b/sregym/observer/logstash/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +description: Official Elastic helm chart for Logstash +home: https://github.com/elastic/helm-charts +maintainers: + - email: helm-charts@elastic.co + name: Elastic +name: logstash +version: 8.7.1 +appVersion: 8.7.1 +sources: + - https://github.com/elastic/logstash +icon: https://helm.elastic.co/icons/logstash.png diff --git a/sregym/observer/logstash/Makefile b/sregym/observer/logstash/Makefile new file mode 100644 index 0000000..143a1d9 --- /dev/null +++ b/sregym/observer/logstash/Makefile @@ -0,0 +1 @@ +include ../helpers/common.mk diff --git a/sregym/observer/logstash/README.md b/sregym/observer/logstash/README.md new file mode 100644 index 0000000..f3b7df5 --- /dev/null +++ b/sregym/observer/logstash/README.md @@ -0,0 +1,261 @@ +* Install ElasticSearch +* Get the Elastic password: +```shell +sudo /usr/share/elasticsearch/bin/elasticsearch-reset-password -u elastic +``` +* Set the secret of elasticsearch in K8S: +```shell +kubectl create secret generic es-username --from-literal=username=elastic -n observe +kubectl create secret generic es-password --from-literal=password=YOUR_PASSWORD -n observe +``` + +* Install: +```shell +cd examples/elasticsearch/ +helm install logstash -n observe --values=values.yaml ../../ +``` + +# Logstash Helm Chart + +[![Build Status](https://img.shields.io/jenkins/s/https/devops-ci.elastic.co/job/elastic+helm-charts+main.svg)](https://devops-ci.elastic.co/job/elastic+helm-charts+main/) [![Artifact HUB](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/elastic)](https://artifacthub.io/packages/search?repo=elastic) + +This Helm chart is a lightweight way to configure and run our official +[Logstash Docker image][]. + +> **Warning** +> When it comes to running the Elastic on Kubernetes infrastructure, we +> recommend [Elastic Cloud on Kubernetes][] (ECK) as the best way to run and manage +> the Elastic Stack. +> +> ECK offers many operational benefits for both our basic-tier and our +> enterprise-tier customers, such as spinning up cluster nodes that were lost on +> failed infrastructure, seamless upgrades, rolling cluster changes, and much +> much more. +> +> With the release of the Elastic Stack Helm charts for Elastic version 8.5.1, +> we are handing over the ongoing maintenance of our Elastic Stack Helm charts +> to the community and contributors. This repository will finally be archived +> after 6 months time. Elastic Stacks deployed on Kubernetes through Helm charts +> will still be fully supported under EOL limitations. +> +> Since we want to provide an even better experience for our customers by +> running the Elastic Stack on Kubernetes, we will continue maintaining the +> Helm charts applicable to ECK Custom Resources. These charts can be found in +> the [ECK repository][eck-charts]. +> +> Helm charts will currently be maintained for ECK Enterprise-tier customers, +> however, we encourage the community to engage with the existing Helm charts +> for the Elastic Stack and continue supporting their ongoing maintenance. +> +> See for more details. + + + + + +- [Requirements](#requirements) +- [Installing](#installing) + - [Install a released version using the Helm repository](#install-a-released-version-using-the-helm-repository) + - [Install a development version using the main branch](#install-a-development-version-using-the-main-branch) +- [Upgrading](#upgrading) +- [Usage notes](#usage-notes) +- [Configuration](#configuration) +- [FAQ](#faq) + - [How to install OSS version of Logstash?](#how-to-install-oss-version-of-logstash) + - [How to install plugins?](#how-to-install-plugins) +- [Contributing](#contributing) + + + + + + +## Requirements + +See [supported configurations][] for more details. + +## Installing + +### Install a released version using the Helm repository + +* Add the Elastic Helm charts repo: +`helm repo add elastic https://helm.elastic.co` + +* Install it: `helm install logstash elastic/logstash` + +### Install a development version using the main branch + +* Clone the git repo: `git clone git@github.com:elastic/helm-charts.git` + +* Install it: `helm install logstash ./helm-charts/logstash --set imageTag=8.5.1` + + +## Upgrading + +Please always check [CHANGELOG.md][] and [BREAKING_CHANGES.md][] before +upgrading to a new chart version. + + +## Usage notes + +* This repo includes several [examples][] of configurations that can be used +as a reference. They are also used in the automated testing of this chart +* Automated testing of this chart is currently only run against GKE (Google +Kubernetes Engine). +* The chart deploys a StatefulSet and by default will do an automated rolling +update of your cluster. It does this by waiting for the cluster health to become +green after each instance is updated. If you prefer to update manually you can +set `OnDelete` [updateStrategy][]. +* It is important to verify that the JVM heap size in `logstashJavaOpts` and to +set the CPU/Memory `resources` to something suitable for your cluster. +* We have designed this chart to be very un-opinionated about how to configure +Logstash. It exposes ways to set environment variables and mount secrets inside +of the container. Doing this makes it much easier for this chart to support +multiple versions with minimal changes. +* `logstash.yml` configuration files can be set either by a ConfigMap using +`logstashConfig` in `values.yml` or by environment variables using `extraEnvs` +in `values.yml` , however Logstash Docker image can't mix both methods as +defining settings with environment variables causes `logstash.yml` to be +modified in place while using ConfigMap bind-mount the same file (more details +in this [note][]). +* When overriding `logstash.yml`, `http.host: 0.0.0.0` should always be included +to make default probes work. If restricting HTTP API to 127.0.0.1 is required by +using `http.host: 127.0.0.1`, default probes should be disabled or overridden +(see [values.yaml][] for the good syntax). +* An ingress is provided that can be used to expose the HTTP port. This can be +useful for the [http input plugin][], for instance. + + +## Configuration + +| Parameter | Description | Default | +|---------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------| +| `antiAffinityTopologyKey` | The [anti-affinity][] topology key]. By default this will prevent multiple Logstash nodes from running on the same Kubernetes node | `kubernetes.io/hostname` | +| `antiAffinity` | Setting this to hard enforces the [anti-affinity][] rules. If it is set to soft it will be done "best effort". Other values will be ignored | `hard` | +| `envFrom` | Templatable string to be passed to the [environment from variables][] which will be appended to the `envFrom:` definition for the container | `[]` | +| `extraContainers` | Templatable string of additional containers to be passed to the `tpl` function | `[]` | +| `extraEnvs` | Extra [environment variables][] which will be appended to the `env:` definition for the container | `[]` | +| `extraInitContainers` | Templatable string of additional `initContainers` to be passed to the `tpl` function | `[]` | +| `extraPorts` | An array of extra ports to open on the pod | `[]` | +| `extraVolumeMounts` | Templatable string of additional `volumeMounts` to be passed to the `tpl` function | `[]` | +| `extraVolumes` | Templatable string of additional `volumes` to be passed to the `tpl` function | `[]` | +| `fullnameOverride` | Overrides the full name of the resources. If not set the name will default to " `.Release.Name` - `.Values.nameOverride or .Chart.Name` " | `""` | +| `hostAliases` | Configurable [hostAliases][] | `[]` | +| `httpPort` | The http port that Kubernetes will use for the healthchecks and the service | `9600` | +| `imagePullPolicy` | The Kubernetes [imagePullPolicy][] value | `IfNotPresent` | +| `imagePullSecrets` | Configuration for [imagePullSecrets][] so that you can use a private registry for your image | `[]` | +| `imageTag` | The Logstash Docker image tag | `8.5.1` | +| `image` | The Logstash Docker image | `docker.elastic.co/logstash/logstash` | +| `labels` | Configurable [labels][] applied to all Logstash pods | `{}` | +| `ingress` | Configurable [ingress][] for external access to Logstash HTTP port. | see [values.yaml][] | +| `lifecycle` | Allows you to add lifecycle configuration. See [values.yaml][] for an example of the formatting | `{}` | +| `livenessProbe` | Configuration fields for the liveness [probe][] | see [values.yaml][] | +| `logstashConfig` | Allows you to add any config files in `/usr/share/logstash/config/` such as `logstash.yml` and `log4j2.properties` See [values.yaml][] for an example of the formatting | `{}` | +| `logstashJavaOpts` | Java options for Logstash. This is where you should configure the JVM heap size | `-Xmx1g -Xms1g` | +| `logstashPipeline` | Allows you to add any pipeline files in `/usr/share/logstash/pipeline/` | `{}` | +| `logstashPatternDir` | Allows you to define a custom directory to store pattern files | `/usr/share/logstash/patterns/` | +| `logstashPattern` | Allows you to add any pattern files in `logstashPatternDir` | `{}` | +| `maxUnavailable` | The [maxUnavailable][] value for the pod disruption budget. By default this will prevent Kubernetes from having more than 1 unhealthy pod in the node group | `1` | +| `nameOverride` | Overrides the chart name for resources. If not set the name will default to `.Chart.Name` | `""` | +| `nodeAffinity` | Value for the [node affinity settings][] | `{}` | +| `podAffinity` | Value for the [pod affinity settings][] | `{}` | +| `nodeSelector` | Configurable [nodeSelector][] so that you can target specific nodes for your Logstash cluster | `{}` | +| `persistence` | Enables a persistent volume for Logstash data | see [values.yaml][] | +| `podAnnotations` | Configurable [annotations][] applied to all Logstash pods | `{}` | +| `podManagementPolicy` | By default Kubernetes [deploys StatefulSets serially][]. This deploys them in parallel so that they can discover each other | `Parallel` | +| `podSecurityContext` | Allows you to set the [securityContext][] for the pod | see [values.yaml][] | +| `podSecurityPolicy` | Configuration for create a pod security policy with minimal permissions to run this Helm chart with `create: true` Also can be used to reference an external pod security policy with `name: "externalPodSecurityPolicy"` | see [values.yaml][] | +| `priorityClassName` | The name of the [PriorityClass][]. No default is supplied as the PriorityClass must be created first | `""` | +| `rbac` | Configuration for creating a role, role binding and service account as part of this Helm chart with `create: true` Also can be used to reference an external service account with `serviceAccountName: "externalServiceAccountName"` | see [values.yaml][] | +| `readinessProbe` | Configuration fields for the readiness [probe][] | see [values.yaml][] | +| `replicas` | Kubernetes replica count for the StatefulSet (i.e. how many pods) | `1` | +| `resources` | Allows you to set the [resources][] for the StatefulSet | see [values.yaml][] | +| `schedulerName` | Name of the [alternate scheduler][] | `""` | +| `secrets` | Allows you easily create a secret from as variables or file. For add secrets from file, add suffix `.filepath` to the key of secret key. The value will be encoded to base64. Useful for store certificates and other secrets. | See [values.yaml][] | +| `secretMounts` | Allows you easily mount a secret as a file inside the StatefulSet. Useful for mounting certificates and other secrets. See [values.yaml][] for an example | `[]` | +| `securityContext` | Allows you to set the [securityContext][] for the container | see [values.yaml][] | +| `service` | Configurable [service][] to expose the Logstash service. | see [values.yaml][] | +| `terminationGracePeriod` | The [terminationGracePeriod][] in seconds used when trying to stop the pod | `120` | +| `tolerations` | Configurable [tolerations][] | `[]` | +| `updateStrategy` | The [updateStrategy][] for the StatefulSet. By default Kubernetes will wait for the cluster to be green after upgrading each pod. Setting this to `OnDelete` will allow you to manually delete each pod during upgrades | `RollingUpdate` | +| `volumeClaimTemplate` | Configuration for the [volumeClaimTemplate for StatefulSets][]. You will want to adjust the storage (default `30Gi` ) and the `storageClassName` if you are using a different storage class | see [values.yaml][] | + + +## FAQ + +### How to install OSS version of Logstash? + +Deploying OSS version of Logstash can be done by setting `image` value to +[Logstash OSS Docker image][] + +An example of Logstash deployment using OSS version can be found in +[examples/oss][]. + +### How to install plugins? + +The recommended way to install plugins into our Docker images is to create a +[custom Docker image][]. + +The Dockerfile would look something like: + +``` +ARG logstash_version +FROM docker.elastic.co/logstash/logstash:${logstash_version} +RUN bin/logstash-plugin install logstash-output-kafka +``` + +And then updating the `image` in values to point to your custom image. + +There are a couple reasons we recommend this: + +1. Tying the availability of Logstash to the download service to install plugins +is not a great idea or something that we recommend. Especially in Kubernetes +where it is normal and expected for a container to be moved to another host at +random times. +2. Mutating the state of a running Docker image (by installing plugins) goes +against best practices of containers and immutable infrastructure. + + +## Contributing + +Please check [CONTRIBUTING.md][] before any contribution or for any questions +about our development and testing process. + +[alternate scheduler]: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/#specify-schedulers-for-pods +[annotations]: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ +[anti-affinity]: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity +[BREAKING_CHANGES.md]: https://github.com/elastic/helm-charts/blob/main/BREAKING_CHANGES.md +[CHANGELOG.md]: https://github.com/elastic/helm-charts/blob/main/CHANGELOG.md +[CONTRIBUTING.md]: https://github.com/elastic/helm-charts/blob/main/CONTRIBUTING.md +[custom docker image]: https://www.elastic.co/guide/en/logstash/current/docker-config.html#_custom_images +[deploys statefulsets serially]: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#pod-management-policies +[eck-charts]: https://github.com/elastic/cloud-on-k8s/tree/master/deploy +[elastic cloud on kubernetes]: https://github.com/elastic/cloud-on-k8s +[environment from variables]: https://kubernetes.io/docs/tasks/configure-pod-container/configure-pod-configmap/#configure-all-key-value-pairs-in-a-configmap-as-container-environment-variables +[environment variables]: https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/#using-environment-variables-inside-of-your-config +[examples]: https://github.com/elastic/helm-charts/tree/main/logstash/examples +[examples/oss]: https://github.com/elastic/helm-charts/tree/main/logstash/examples/oss +[hostAliases]: https://kubernetes.io/docs/concepts/services-networking/add-entries-to-pod-etc-hosts-with-host-aliases/ +[http input plugin]: https://www.elastic.co/guide/en/logstash/current/plugins-inputs-http.html +[imagePullPolicy]: https://kubernetes.io/docs/concepts/containers/images/#updating-images +[imagePullSecrets]: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-pod-that-uses-your-secret +[ingress]: https://kubernetes.io/docs/concepts/services-networking/ingress/ +[labels]: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ +[logstash docker image]: https://www.elastic.co/guide/en/logstash/current/docker.html +[logstash oss docker image]: https://www.docker.elastic.co/r/logstash/logstash-oss +[maxUnavailable]: https://kubernetes.io/docs/tasks/run-application/configure-pdb/#specifying-a-poddisruptionbudget +[node affinity settings]: https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes-using-node-affinity/ +[nodeSelector]: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector +[note]: https://www.elastic.co/guide/en/logstash/current/docker-config.html#docker-env-config +[pod affinity settings]: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#inter-pod-affinity-and-anti-affinity +[priorityClass]: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass +[probe]: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/ +[resources]: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/ +[securityContext]: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod +[service]: https://kubernetes.io/docs/concepts/services-networking/service/ +[supported configurations]: https://github.com/elastic/helm-charts/tree/main/README.md#supported-configurations +[terminationGracePeriod]: https://kubernetes.io/docs/concepts/workloads/pods/pod/#termination-of-pods +[tolerations]: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +[updateStrategy]: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/ +[values.yaml]: https://github.com/elastic/helm-charts/tree/main/logstash/values.yaml +[volumeClaimTemplate for statefulsets]: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-storage diff --git a/sregym/observer/logstash/examples/default/Makefile b/sregym/observer/logstash/examples/default/Makefile new file mode 100644 index 0000000..7721340 --- /dev/null +++ b/sregym/observer/logstash/examples/default/Makefile @@ -0,0 +1,14 @@ +default: test + +include ../../../helpers/examples.mk + +RELEASE := helm-logstash-default +TIMEOUT := 1200s + +install: + helm upgrade --wait --timeout=$(TIMEOUT) --install $(RELEASE) ../../ + +test: install goss + +purge: + helm del $(RELEASE) diff --git a/sregym/observer/logstash/examples/default/README.md b/sregym/observer/logstash/examples/default/README.md new file mode 100644 index 0000000..ec1b35c --- /dev/null +++ b/sregym/observer/logstash/examples/default/README.md @@ -0,0 +1,17 @@ +# Default + +This example deploy Logstash 8.5.1 using [default values][]. + + +## Usage + +* Deploy Logstash chart with the default values: `make install` + + +## Testing + +You can also run [goss integration tests][] using `make test` + + +[goss integration tests]: https://github.com/elastic/helm-charts/tree/main/logstash/examples/default/test/goss.yaml +[default values]: https://github.com/elastic/helm-charts/tree/main/logstash/values.yaml diff --git a/sregym/observer/logstash/examples/default/test/goss.yaml b/sregym/observer/logstash/examples/default/test/goss.yaml new file mode 100644 index 0000000..fe4dab3 --- /dev/null +++ b/sregym/observer/logstash/examples/default/test/goss.yaml @@ -0,0 +1,41 @@ +user: + logstash: + exists: true + uid: 1000 + gid: 1000 + +http: + http://localhost:9600?pretty: + status: 200 + timeout: 2000 + body: + - '"version" : "8.7.1"' + - '"http_address" : "0.0.0.0:9600"' + - '"status" : "green"' + - '"workers" : 1' + - '"batch_size" : 125' + - '"batch_delay" : 50' + +file: + /usr/share/logstash/config/logstash.yml: + exists: true + mode: "0644" + owner: logstash + group: root + filetype: file + contains: + - 'http.host: "0.0.0.0"' + - 'xpack.monitoring.elasticsearch.hosts: [ "http://elasticsearch:9200" ]' + /usr/share/logstash/pipeline/logstash.conf: + exists: true + mode: "0644" + owner: logstash + group: root + filetype: file + contains: + - "input {" + - "beats {" + - "port => 5044" + - "output {" + - "stdout {" + - "codec => rubydebug" diff --git a/sregym/observer/logstash/examples/elasticsearch/Makefile b/sregym/observer/logstash/examples/elasticsearch/Makefile new file mode 100644 index 0000000..e02406f --- /dev/null +++ b/sregym/observer/logstash/examples/elasticsearch/Makefile @@ -0,0 +1,15 @@ +default: test + +include ../../../helpers/examples.mk + +RELEASE := logstash +TIMEOUT := 1200s + +install: + helm upgrade -n observe --wait --timeout=$(TIMEOUT) --install --values values.yaml $(RELEASE) ../../ + +test: install goss + +purge: + helm del $(RELEASE) + kubectl delete $$(kubectl get pvc -l release=$(RELEASE) -o name) diff --git a/sregym/observer/logstash/examples/elasticsearch/README.md b/sregym/observer/logstash/examples/elasticsearch/README.md new file mode 100644 index 0000000..23595a9 --- /dev/null +++ b/sregym/observer/logstash/examples/elasticsearch/README.md @@ -0,0 +1,28 @@ +# Elasticsearch + +This example deploy Logstash 8.5.1 which connects to Elasticsearch (see +[values][]). + + +## Usage + +* Deploy [Elasticsearch Helm chart][]. + +* Deploy Logstash chart: `make install` + +* You can now setup a port forward to query Logstash indices: + + ``` + kubectl port-forward svc/elasticsearch-master 9200 + curl localhost:9200/_cat/indices + ``` + + +## Testing + +You can also run [goss integration tests][] using `make test` + + +[elasticsearch helm chart]: https://github.com/elastic/helm-charts/tree/main/elasticsearch/examples/default/ +[goss integration tests]: https://github.com/elastic/helm-charts/tree/main/logstash/examples/elasticsearch/test/goss.yaml +[values]: https://github.com/elastic/helm-charts/tree/main/logstash/examples/elasticsearch/values.yaml diff --git a/sregym/observer/logstash/examples/elasticsearch/test/goss.yaml b/sregym/observer/logstash/examples/elasticsearch/test/goss.yaml new file mode 100644 index 0000000..e5ae079 --- /dev/null +++ b/sregym/observer/logstash/examples/elasticsearch/test/goss.yaml @@ -0,0 +1,58 @@ +mount: + /usr/share/logstash/data: + exists: true + /usr/share/logstash/config/logstash.yml: + exists: true + opts: + - ro + /usr/share/logstash/pipeline/uptime.conf: + exists: true + opts: + - ro + +user: + logstash: + exists: true + uid: 1000 + gid: 1000 + +http: + http://localhost:9600?pretty: + status: 200 + timeout: 2000 + body: + - '"version" : "8.7.1"' + - '"http_address" : "0.0.0.0:9600"' + - '"status" : "green"' + - '"workers" : 1' + - '"batch_size" : 125' + - '"batch_delay" : 50' + https://elasticsearch-master:9200/_cat/indices: + allow-insecure: true + status: 200 + timeout: 2000 + username: "{{ .Env.ELASTICSEARCH_USERNAME }}" + password: "{{ .Env.ELASTICSEARCH_PASSWORD }}" + body: + - "logstash" + +file: + /usr/share/logstash/config/logstash.yml: + exists: true + mode: "0644" + owner: root + group: logstash + filetype: file + contains: + - "http.host: 0.0.0.0" + - "xpack.monitoring.enabled: false" + /usr/share/logstash/pipeline/uptime.conf: + exists: true + mode: "0644" + owner: root + group: logstash + filetype: file + contains: + - 'input { exec { command => "uptime" interval => 30 } }' + - 'hosts => ["https://elasticsearch-master:9200"]' + - 'index => "logstash"' diff --git a/sregym/observer/logstash/examples/elasticsearch/values.yaml b/sregym/observer/logstash/examples/elasticsearch/values.yaml new file mode 100644 index 0000000..c97a00e --- /dev/null +++ b/sregym/observer/logstash/examples/elasticsearch/values.yaml @@ -0,0 +1,53 @@ +persistence: + enabled: false + +logstashConfig: + logstash.yml: | + http.host: 0.0.0.0 + xpack.monitoring.enabled: false + config.reload.automatic: true + +logstashPipeline: + uptime.conf: | + output { + stdout { + codec => rubydebug + } + elasticsearch { + hosts => ["http://10.0.0.4:9200"] + user => '${ELASTICSEARCH_USERNAME}' + password => '${ELASTICSEARCH_PASSWORD}' + index => "logstash-%{+YYYY.MM.dd.HH}" + } + } +# cacert => '/usr/share/logstash/config/certs/ca.crt' +extraEnvs: + - name: "ELASTICSEARCH_USERNAME" + valueFrom: + secretKeyRef: + name: es-username + key: username + - name: "ELASTICSEARCH_PASSWORD" + valueFrom: + secretKeyRef: + name: es-password + key: password + +# secretMounts: +# - name: ca-config +# secretName: ca-config +# path: /usr/share/logstash/config/certs + + # - name: elasticsearch-master-certs + # secretName: elasticsearch-master-certs + # path: /usr/share/logstash/config/certs +service: + name: logstash + type: ClusterIP + ports: + - name: beats + port: 5044 + targetPort: 5044 + selector: + app: logstash-logstash + diff --git a/sregym/observer/logstash/examples/oss/Makefile b/sregym/observer/logstash/examples/oss/Makefile new file mode 100644 index 0000000..b6a16a8 --- /dev/null +++ b/sregym/observer/logstash/examples/oss/Makefile @@ -0,0 +1,14 @@ +default: test + +include ../../../helpers/examples.mk + +RELEASE := helm-logstash-oss +TIMEOUT := 1200s + +install: + helm upgrade --wait --timeout=$(TIMEOUT) --install --values values.yaml $(RELEASE) ../../ + +test: install goss + +purge: + helm del $(RELEASE) diff --git a/sregym/observer/logstash/examples/oss/README.md b/sregym/observer/logstash/examples/oss/README.md new file mode 100644 index 0000000..e3309b7 --- /dev/null +++ b/sregym/observer/logstash/examples/oss/README.md @@ -0,0 +1,17 @@ +# OSS + +This example deploy Logstash 8.5.1 using [Logstash OSS][] version. + + +## Usage + +* Deploy Logstash chart with the default values: `make install` + + +## Testing + +You can also run [goss integration tests][] using `make test` + + +[logstash oss]: https://www.elastic.co/downloads/logstash-oss +[goss integration tests]: https://github.com/elastic/helm-charts/tree/main/logstash/examples/oss/test/goss.yaml diff --git a/sregym/observer/logstash/examples/oss/test/goss.yaml b/sregym/observer/logstash/examples/oss/test/goss.yaml new file mode 100644 index 0000000..9bc4438 --- /dev/null +++ b/sregym/observer/logstash/examples/oss/test/goss.yaml @@ -0,0 +1,40 @@ +user: + logstash: + exists: true + uid: 1000 + gid: 1000 + +http: + http://localhost:9600?pretty: + status: 200 + timeout: 2000 + body: + - '"version" : "8.7.1"' + - '"http_address" : "0.0.0.0:9600"' + - '"status" : "green"' + - '"workers" : 1' + - '"batch_size" : 125' + - '"batch_delay" : 50' + +file: + /usr/share/logstash/config/logstash.yml: + exists: true + mode: "0644" + owner: logstash + group: root + filetype: file + contains: + - 'http.host: "0.0.0.0"' + /usr/share/logstash/pipeline/logstash.conf: + exists: true + mode: "0644" + owner: logstash + group: root + filetype: file + contains: + - "input {" + - "beats {" + - "port => 5044" + - "output {" + - "stdout {" + - "codec => rubydebug" diff --git a/sregym/observer/logstash/examples/oss/values.yaml b/sregym/observer/logstash/examples/oss/values.yaml new file mode 100644 index 0000000..7717c0b --- /dev/null +++ b/sregym/observer/logstash/examples/oss/values.yaml @@ -0,0 +1,2 @@ +--- +image: "docker.elastic.co/logstash/logstash-oss" diff --git a/sregym/observer/logstash/examples/security/Makefile b/sregym/observer/logstash/examples/security/Makefile new file mode 100644 index 0000000..b2dbea0 --- /dev/null +++ b/sregym/observer/logstash/examples/security/Makefile @@ -0,0 +1,15 @@ +default: test + +include ../../../helpers/examples.mk + +RELEASE := helm-logstash-security +TIMEOUT := 1200s + +install: + helm upgrade --wait --timeout=$(TIMEOUT) --install --values values.yaml $(RELEASE) ../../ + +test: install goss + +purge: + helm del $(RELEASE) + kubectl delete $$(kubectl get pvc -l release=$(RELEASE) -o name) diff --git a/sregym/observer/logstash/examples/security/README.md b/sregym/observer/logstash/examples/security/README.md new file mode 100644 index 0000000..07880a5 --- /dev/null +++ b/sregym/observer/logstash/examples/security/README.md @@ -0,0 +1,28 @@ +# Security + +This example deploy Logstash 8.5.1 which connects to Elasticsearch using TLS +(see [values][]). + + +## Usage + +* Deploy [Elasticsearch Helm chart with security][]. + +* Deploy Logstash chart: `make install` + +* You can now setup a port forward to query Logstash indices: + + ``` + kubectl port-forward svc/elasticsearch-master 9200 + curl localhost:9200/_cat/indices + ``` + + +## Testing + +You can also run [goss integration tests][] using `make test` + + +[elasticsearch helm chart with security]: https://github.com/elastic/helm-charts/tree/main/elasticsearch/examples/security/ +[goss integration tests]: https://github.com/elastic/helm-charts/tree/main/logstash/examples/security/test/goss.yaml +[values]: https://github.com/elastic/helm-charts/tree/main/logstash/examples/security/values.yaml diff --git a/sregym/observer/logstash/examples/security/test/goss.yaml b/sregym/observer/logstash/examples/security/test/goss.yaml new file mode 100644 index 0000000..8c1a938 --- /dev/null +++ b/sregym/observer/logstash/examples/security/test/goss.yaml @@ -0,0 +1,62 @@ +mount: + /usr/share/logstash/data: + exists: true + /usr/share/logstash/config/logstash.yml: + exists: true + opts: + - ro + /usr/share/logstash/pipeline/uptime.conf: + exists: true + opts: + - ro + +user: + logstash: + exists: true + uid: 1000 + gid: 1000 + +http: + http://localhost:9600?pretty: + status: 200 + timeout: 2000 + body: + - '"version" : "8.7.1"' + - '"http_address" : "0.0.0.0:9600"' + - '"status" : "green"' + - '"workers" : 1' + - '"batch_size" : 125' + - '"batch_delay" : 50' + https://security-master:9200/_cat/indices: + status: 200 + timeout: 2000 + body: + - "logstash" + allow-insecure: true + username: "{{ .Env.ELASTICSEARCH_USERNAME }}" + password: "{{ .Env.ELASTICSEARCH_PASSWORD }}" + +file: + /usr/share/logstash/config/logstash.yml: + exists: true + mode: "0644" + owner: root + group: logstash + filetype: file + contains: + - "http.host: 0.0.0.0" + - "xpack.monitoring.enabled: true" + - 'xpack.monitoring.elasticsearch.hosts: ["https://security-master:9200"]' + - "xpack.monitoring.elasticsearch.ssl.certificate_authority: /usr/share/logstash/config/certs/elastic-certificate.crt" + /usr/share/logstash/pipeline/uptime.conf: + exists: true + mode: "0644" + owner: root + group: logstash + filetype: file + contains: + - 'input { exec { command => "uptime" interval => 30 } }' + - "output { elasticsearch {" + - 'hosts => ["https://security-master:9200"]' + - 'cacert => "/usr/share/logstash/config/certs/elastic-certificate.crt"' + - 'index => "logstash"' diff --git a/sregym/observer/logstash/examples/security/values.yaml b/sregym/observer/logstash/examples/security/values.yaml new file mode 100644 index 0000000..bab3b06 --- /dev/null +++ b/sregym/observer/logstash/examples/security/values.yaml @@ -0,0 +1,40 @@ +persistence: + enabled: true + +logstashConfig: + logstash.yml: | + http.host: 0.0.0.0 + xpack.monitoring.enabled: true + xpack.monitoring.elasticsearch.username: '${ELASTICSEARCH_USERNAME}' + xpack.monitoring.elasticsearch.password: '${ELASTICSEARCH_PASSWORD}' + xpack.monitoring.elasticsearch.hosts: ["https://security-master:9200"] + xpack.monitoring.elasticsearch.ssl.certificate_authority: /usr/share/logstash/config/certs/elastic-certificate.crt + +logstashPipeline: + uptime.conf: | + input { exec { command => "uptime" interval => 30 } } + output { elasticsearch { + hosts => ["https://security-master:9200"] + cacert => "/usr/share/logstash/config/certs/elastic-certificate.crt" + user => '${ELASTICSEARCH_USERNAME}' + password => '${ELASTICSEARCH_PASSWORD}' + index => "logstash" + } + } + +secretMounts: + - name: elastic-certificate-crt + secretName: elastic-certificate-crt + path: /usr/share/logstash/config/certs + +extraEnvs: + - name: "ELASTICSEARCH_USERNAME" + valueFrom: + secretKeyRef: + name: security-master-credentials + key: username + - name: "ELASTICSEARCH_PASSWORD" + valueFrom: + secretKeyRef: + name: security-master-credentials + key: password diff --git a/sregym/observer/logstash/examples/upgrade/Makefile b/sregym/observer/logstash/examples/upgrade/Makefile new file mode 100644 index 0000000..0842215 --- /dev/null +++ b/sregym/observer/logstash/examples/upgrade/Makefile @@ -0,0 +1,16 @@ +default: test + +include ../../../helpers/examples.mk + +CHART := logstash +RELEASE := helm-logstash-upgrade +FROM := 7.9.0 # upgrade from version < 7.9.0 is failing due to headless service breaking change + +install: + ../../../helpers/upgrade.sh --chart $(CHART) --release $(RELEASE) --from $(FROM) + kubectl rollout status statefulset $(RELEASE)-logstash + +test: install goss + +purge: + helm del $(RELEASE) diff --git a/sregym/observer/logstash/examples/upgrade/README.md b/sregym/observer/logstash/examples/upgrade/README.md new file mode 100644 index 0000000..aafb023 --- /dev/null +++ b/sregym/observer/logstash/examples/upgrade/README.md @@ -0,0 +1,19 @@ +# Upgrade + +This example will deploy Logstash chart using an old chart version, +then upgrade it. + + +## Usage + +* Add the Elastic Helm charts repo: `helm repo add elastic https://helm.elastic.co` + +* Deploy and upgrade Logstash chart with the default values: `make install` + + +## Testing + +You can also run [goss integration tests][] using `make test`. + + +[goss integration tests]: https://github.com/elastic/helm-charts/tree/main/logstash/examples/upgrade/test/goss.yaml diff --git a/sregym/observer/logstash/examples/upgrade/test/goss.yaml b/sregym/observer/logstash/examples/upgrade/test/goss.yaml new file mode 100644 index 0000000..fe4dab3 --- /dev/null +++ b/sregym/observer/logstash/examples/upgrade/test/goss.yaml @@ -0,0 +1,41 @@ +user: + logstash: + exists: true + uid: 1000 + gid: 1000 + +http: + http://localhost:9600?pretty: + status: 200 + timeout: 2000 + body: + - '"version" : "8.7.1"' + - '"http_address" : "0.0.0.0:9600"' + - '"status" : "green"' + - '"workers" : 1' + - '"batch_size" : 125' + - '"batch_delay" : 50' + +file: + /usr/share/logstash/config/logstash.yml: + exists: true + mode: "0644" + owner: logstash + group: root + filetype: file + contains: + - 'http.host: "0.0.0.0"' + - 'xpack.monitoring.elasticsearch.hosts: [ "http://elasticsearch:9200" ]' + /usr/share/logstash/pipeline/logstash.conf: + exists: true + mode: "0644" + owner: logstash + group: root + filetype: file + contains: + - "input {" + - "beats {" + - "port => 5044" + - "output {" + - "stdout {" + - "codec => rubydebug" diff --git a/sregym/observer/logstash/examples/upgrade/values.yaml b/sregym/observer/logstash/examples/upgrade/values.yaml new file mode 100644 index 0000000..7313caa --- /dev/null +++ b/sregym/observer/logstash/examples/upgrade/values.yaml @@ -0,0 +1 @@ +--- diff --git a/sregym/observer/logstash/jaeger/jaeger.py b/sregym/observer/logstash/jaeger/jaeger.py new file mode 100644 index 0000000..6295171 --- /dev/null +++ b/sregym/observer/logstash/jaeger/jaeger.py @@ -0,0 +1,104 @@ +import os +import socket +import subprocess +import time +from pathlib import Path + + +class Jaeger: + def __init__(self): + self.namespace = "observe" + base_dir = Path(__file__).parent + self.config_file = base_dir / "jaeger.yaml" + self.port = 16686 # local port for Jaeger UI + self.port_forward_process = None + os.environ["JAEGER_BASE_URL"] = f"http://localhost:{self.port}" + + def run_cmd(self, cmd: str) -> str: + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + if result.returncode != 0: + raise Exception(f"Command failed: {cmd}\nError: {result.stderr}") + return result.stdout.strip() + + def deploy(self): + """Deploy Jaeger with TiDB as the storage backend.""" + self.run_cmd(f"kubectl apply -f {self.config_file} -n {self.namespace}") + self.wait_for_service("jaeger-out", timeout=120) + self.start_port_forward() + print("Jaeger deployed successfully.") + + def is_port_in_use(self, port: int) -> bool: + """Check if a local TCP port is already bound.""" + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + return s.connect_ex(("127.0.0.1", port)) == 0 + + def wait_for_service(self, service: str, timeout: int = 60): + """Wait until the Jaeger service exists in Kubernetes.""" + print(f"[debug] waiting for service {service} in ns={self.namespace}") + t0 = time.time() + while time.time() - t0 < timeout: + result = subprocess.run( + f"kubectl -n {self.namespace} get svc {service}", + shell=True, + capture_output=True, + text=True, + ) + if result.returncode == 0: + print(f"[debug] found service {service}") + return + time.sleep(3) + raise RuntimeError(f"Service {service} not found within {timeout}s") + + def start_port_forward(self): + """Starts port-forwarding to access Prometheus.""" + print("Start port-forwarding for Prometheus.") + if self.port_forward_process and self.port_forward_process.poll() is None: + print("Port-forwarding already active.") + return + + for attempt in range(3): + if self.is_port_in_use(self.port): + print(f"Port {self.port} is already in use. Attempt {attempt + 1} of 3. Retrying in 3 seconds...") + time.sleep(3) + continue + + command = f"kubectl port-forward svc/jaeger-out {self.port}:16686 -n observe" + self.port_forward_process = subprocess.Popen( + command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + os.environ["JAEGER_PORT"] = str(self.port) + time.sleep(3) # Wait a bit for the port-forward to establish + + if self.port_forward_process.poll() is None: + print(f"Port forwarding established at {self.port}.") + os.environ["JAEGER_PORT"] = str(self.port) + break + else: + print("Port forwarding failed. Retrying...") + else: + print("Failed to establish port forwarding after multiple attempts.") + + def stop_port_forward(self): + """Stops the kubectl port-forward command and cleans up resources.""" + if self.port_forward_process: + self.port_forward_process.terminate() + try: + self.port_forward_process.wait(timeout=5) + except subprocess.TimeoutExpired: + print("Port-forward process did not terminate in time, killing...") + self.port_forward_process.kill() + + if self.port_forward_process.stdout: + self.port_forward_process.stdout.close() + if self.port_forward_process.stderr: + self.port_forward_process.stderr.close() + + print("Port forwarding stopped.") + + +if __name__ == "__main__": + jaeger = Jaeger() diff --git a/sregym/observer/logstash/jaeger/jaeger.yaml b/sregym/observer/logstash/jaeger/jaeger.yaml new file mode 100644 index 0000000..af86b92 --- /dev/null +++ b/sregym/observer/logstash/jaeger/jaeger.yaml @@ -0,0 +1,69 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: jaeger-out + labels: + app-name: jaeger + namespace: observe +spec: + type: NodePort + ports: + - port: 16686 + selector: + app-name: jaeger +--- +apiVersion: v1 +kind: Service +metadata: + name: jaeger-agent + labels: + app-name: jaeger + namespace: observe +spec: + ports: + - port: 5775 + name: udp1 + protocol: UDP + - port: 5778 + name: tcp1 + - port: 6831 + name: udp2 + protocol: UDP + - port: 6832 + name: udp3 + protocol: UDP + - port: 14250 + name: tcp2 + - port: 14268 + name: tcp3 + selector: + app-name: jaeger +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: jaeger-agent + labels: + app-name: jaeger + namespace: observe +spec: + replicas: 1 + selector: + matchLabels: + app-name: jaeger + template: + metadata: + labels: + app-name: jaeger + name: jaeger-agent + spec: + containers: + - name: jaeger-agent + image: jaegertracing/all-in-one:latest + env: + - name: COLLECTOR_ZIPKIN_HTTP_PORT + value: "9411" + - name: JAEGER_AGENT_PORT + value: "5775" + restartPolicy: Always diff --git a/sregym/observer/logstash/templates/NOTES.txt b/sregym/observer/logstash/templates/NOTES.txt new file mode 100644 index 0000000..bf35d65 --- /dev/null +++ b/sregym/observer/logstash/templates/NOTES.txt @@ -0,0 +1,2 @@ +1. Watch all cluster members come up. + $ kubectl get pods --namespace={{ .Release.Namespace }} -l app={{ template "logstash.fullname" . }} -w diff --git a/sregym/observer/logstash/templates/_helpers.tpl b/sregym/observer/logstash/templates/_helpers.tpl new file mode 100644 index 0000000..df5c594 --- /dev/null +++ b/sregym/observer/logstash/templates/_helpers.tpl @@ -0,0 +1,27 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "logstash.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "logstash.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{/* +Use the fullname if the serviceAccount value is not set +*/}} +{{- define "logstash.serviceAccount" -}} +{{- .Values.rbac.serviceAccountName | default (include "logstash.fullname" .) -}} +{{- end -}} diff --git a/sregym/observer/logstash/templates/configmap-config.yaml b/sregym/observer/logstash/templates/configmap-config.yaml new file mode 100644 index 0000000..795722b --- /dev/null +++ b/sregym/observer/logstash/templates/configmap-config.yaml @@ -0,0 +1,17 @@ +{{- if .Values.logstashConfig }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "logstash.fullname" . }}-config + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +data: +{{- range $path, $config := .Values.logstashConfig }} + {{ $path }}: | +{{ tpl $config $ | indent 4 -}} +{{- end -}} +{{- end -}} diff --git a/sregym/observer/logstash/templates/configmap-pattern.yaml b/sregym/observer/logstash/templates/configmap-pattern.yaml new file mode 100644 index 0000000..05f2c71 --- /dev/null +++ b/sregym/observer/logstash/templates/configmap-pattern.yaml @@ -0,0 +1,17 @@ +{{- if .Values.logstashPattern }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "logstash.fullname" . }}-pattern + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +data: +{{- range $path, $config := .Values.logstashPattern }} + {{ $path }}: | +{{ tpl $config $ | indent 4 -}} +{{- end -}} +{{- end -}} diff --git a/sregym/observer/logstash/templates/configmap-pipeline.yaml b/sregym/observer/logstash/templates/configmap-pipeline.yaml new file mode 100644 index 0000000..72c3ece --- /dev/null +++ b/sregym/observer/logstash/templates/configmap-pipeline.yaml @@ -0,0 +1,17 @@ +{{- if .Values.logstashPipeline }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "logstash.fullname" . }}-pipeline + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +data: +{{- range $path, $config := .Values.logstashPipeline }} + {{ $path }}: | +{{ tpl $config $ | indent 4 -}} +{{- end -}} +{{- end -}} diff --git a/sregym/observer/logstash/templates/ingress.yaml b/sregym/observer/logstash/templates/ingress.yaml new file mode 100644 index 0000000..4e0bb32 --- /dev/null +++ b/sregym/observer/logstash/templates/ingress.yaml @@ -0,0 +1,68 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "logstash.fullname" . -}} +{{- $httpPort := .Values.httpPort -}} +{{- $ingressPath := .Values.ingress.path -}} +{{- $pathtype := .Values.ingress.pathtype -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + app: {{ $fullName | quote}} + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +{{- with .Values.ingress.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} +spec: + {{- if .Values.ingress.className }} + ingressClassName: {{ .Values.ingress.className | quote }} + {{- end }} +{{- if .Values.ingress.tls }} + tls: + {{- if .ingressPath }} + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- else }} +{{ toYaml .Values.ingress.tls | indent 4 }} + {{- end }} +{{- end}} + rules: + {{- range .Values.ingress.hosts }} + {{- /* + TODO: deprecate $ingressPath for Logstash 8.x + */}} + {{- if $ingressPath }} + - host: {{ . }} + http: + paths: + - path: {{ $ingressPath }} + pathType: {{ $pathtype }} + backend: + service: + name: {{ $fullName }} + port: + number: {{ $httpPort }} + {{- else }} + - host: {{ .host }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + pathType: {{ $pathtype }} + backend: + service: + name: {{ $fullName }} + port: + number: {{ .servicePort | default $httpPort }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/sregym/observer/logstash/templates/poddisruptionbudget.yaml b/sregym/observer/logstash/templates/poddisruptionbudget.yaml new file mode 100644 index 0000000..a644215 --- /dev/null +++ b/sregym/observer/logstash/templates/poddisruptionbudget.yaml @@ -0,0 +1,20 @@ +{{- if .Values.maxUnavailable }} +{{- if .Capabilities.APIVersions.Has "policy/v1" -}} +apiVersion: policy/v1 +{{- else}} +apiVersion: policy/v1beta1 +{{- end }} +kind: PodDisruptionBudget +metadata: + name: "{{ template "logstash.fullname" . }}-pdb" + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +spec: + maxUnavailable: {{ .Values.maxUnavailable }} + selector: + matchLabels: + app: "{{ template "logstash.fullname" . }}" +{{- end }} diff --git a/sregym/observer/logstash/templates/podsecuritypolicy.yaml b/sregym/observer/logstash/templates/podsecuritypolicy.yaml new file mode 100644 index 0000000..2927756 --- /dev/null +++ b/sregym/observer/logstash/templates/podsecuritypolicy.yaml @@ -0,0 +1,14 @@ +{{- if .Values.podSecurityPolicy.create -}} +{{- $fullName := include "logstash.fullname" . -}} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ default $fullName .Values.podSecurityPolicy.name | quote }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +spec: +{{ toYaml .Values.podSecurityPolicy.spec | indent 2 }} +{{- end -}} diff --git a/sregym/observer/logstash/templates/role.yaml b/sregym/observer/logstash/templates/role.yaml new file mode 100644 index 0000000..9f37ffa --- /dev/null +++ b/sregym/observer/logstash/templates/role.yaml @@ -0,0 +1,25 @@ +{{- if .Values.rbac.create -}} +{{- $fullName := include "logstash.fullname" . -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ $fullName | quote }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +rules: + - apiGroups: + - extensions + resources: + - podsecuritypolicies + resourceNames: + {{- if eq .Values.podSecurityPolicy.name "" }} + - {{ $fullName | quote }} + {{- else }} + - {{ .Values.podSecurityPolicy.name | quote }} + {{- end }} + verbs: + - use +{{- end -}} diff --git a/sregym/observer/logstash/templates/rolebinding.yaml b/sregym/observer/logstash/templates/rolebinding.yaml new file mode 100644 index 0000000..a87d00e --- /dev/null +++ b/sregym/observer/logstash/templates/rolebinding.yaml @@ -0,0 +1,20 @@ +{{- if .Values.rbac.create -}} +{{- $fullName := include "logstash.fullname" . -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ $fullName | quote }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +subjects: + - kind: ServiceAccount + name: "{{ template "logstash.serviceAccount" . }}" + namespace: {{ .Release.Namespace | quote }} +roleRef: + kind: Role + name: {{ $fullName | quote }} + apiGroup: rbac.authorization.k8s.io +{{- end -}} diff --git a/sregym/observer/logstash/templates/secret.yaml b/sregym/observer/logstash/templates/secret.yaml new file mode 100644 index 0000000..0456e4b --- /dev/null +++ b/sregym/observer/logstash/templates/secret.yaml @@ -0,0 +1,27 @@ +{{- if .Values.secrets }} +{{- $fullName := include "logstash.fullname" . -}} +{{- range .Values.secrets }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {{ printf "%s-%s" $fullName .name | quote }} + labels: + app: {{ $fullName | quote }} + chart: {{ $.Chart.Name | quote }} + heritage: {{ $.Release.Service | quote }} + release: {{ $.Release.Name | quote }} + {{- range $key, $value := $.Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} +data: +{{- range $key, $val := .value }} + {{- if hasSuffix "filepath" $key }} + {{ $key | replace ".filepath" "" }}: {{ $.Files.Get $val | b64enc | quote }} + {{ else }} + {{ $key }}: {{ $val | b64enc | quote }} + {{- end }} +{{- end }} +type: Opaque +{{- end }} +{{- end }} \ No newline at end of file diff --git a/sregym/observer/logstash/templates/service-headless.yaml b/sregym/observer/logstash/templates/service-headless.yaml new file mode 100644 index 0000000..1982ef4 --- /dev/null +++ b/sregym/observer/logstash/templates/service-headless.yaml @@ -0,0 +1,20 @@ +--- +kind: Service +apiVersion: v1 +metadata: + name: "{{ template "logstash.fullname" . }}-headless" + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +{{- if .Values.labels }} +{{ toYaml .Values.labels | indent 4 }} +{{- end }} +spec: + clusterIP: None + selector: + app: "{{ template "logstash.fullname" . }}" + ports: + - name: http + port: {{ .Values.httpPort }} diff --git a/sregym/observer/logstash/templates/service.yaml b/sregym/observer/logstash/templates/service.yaml new file mode 100644 index 0000000..b732a0c --- /dev/null +++ b/sregym/observer/logstash/templates/service.yaml @@ -0,0 +1,32 @@ +{{- if .Values.service }} +--- +kind: Service +apiVersion: v1 +metadata: + name: "{{ template "logstash.fullname" . }}" + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + annotations: +{{ toYaml .Values.service.annotations | indent 4 }} +spec: + type: {{ .Values.service.type }} +{{- if .Values.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.service.loadBalancerIP }} +{{- end }} +{{- with .Values.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: +{{ toYaml . | indent 4 }} +{{- end }} +{{- if .Values.service.externalTrafficPolicy }} + externalTrafficPolicy: {{ .Values.service.externalTrafficPolicy }} +{{- end }} + selector: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + release: {{ .Release.Name | quote }} + ports: +{{ toYaml .Values.service.ports | indent 4 }} +{{- end }} diff --git a/sregym/observer/logstash/templates/serviceaccount.yaml b/sregym/observer/logstash/templates/serviceaccount.yaml new file mode 100644 index 0000000..18e56e4 --- /dev/null +++ b/sregym/observer/logstash/templates/serviceaccount.yaml @@ -0,0 +1,22 @@ +{{- if .Values.rbac.create -}} +{{- $fullName := include "logstash.fullname" . -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: "{{ template "logstash.serviceAccount" . }}" + annotations: + {{- with .Values.rbac.serviceAccountAnnotations }} + {{- toYaml . | nindent 4 }} + {{- end }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + {{- if .Values.rbac.annotations }} + annotations: + {{- range $key, $value := .Values.rbac.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} +{{- end -}} diff --git a/sregym/observer/logstash/templates/statefulset.yaml b/sregym/observer/logstash/templates/statefulset.yaml new file mode 100644 index 0000000..c721979 --- /dev/null +++ b/sregym/observer/logstash/templates/statefulset.yaml @@ -0,0 +1,237 @@ +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ template "logstash.fullname" . }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} +spec: + serviceName: {{ template "logstash.fullname" . }}-headless + selector: + matchLabels: + app: "{{ template "logstash.fullname" . }}" + release: {{ .Release.Name | quote }} + replicas: {{ .Values.replicas }} + podManagementPolicy: {{ .Values.podManagementPolicy }} + updateStrategy: + type: {{ .Values.updateStrategy }} + {{- if .Values.persistence.enabled }} + volumeClaimTemplates: + - metadata: + name: {{ template "logstash.fullname" . }} + {{- with .Values.persistence.annotations }} + annotations: +{{ toYaml . | indent 8 }} + {{- end }} + spec: +{{ toYaml .Values.volumeClaimTemplate | indent 6 }} + {{- end }} + template: + metadata: + name: "{{ template "logstash.fullname" . }}" + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + annotations: + {{- range $key, $value := .Values.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- /* This forces a restart if the configmap has changed */}} + {{- if .Values.logstashConfig }} + configchecksum: {{ include (print .Template.BasePath "/configmap-config.yaml") . | sha256sum | trunc 63 }} + {{- end }} + {{- /* This forces a restart if the configmap has changed */}} + {{- if .Values.logstashPipeline }} + pipelinechecksum: {{ include (print .Template.BasePath "/configmap-pipeline.yaml") . | sha256sum | trunc 63 }} + {{- end }} + {{- if .Values.logstashPattern }} + patternchecksum: {{ include (print .Template.BasePath "/configmap-pattern.yaml") . | sha256sum | trunc 63 }} + {{- end }} + {{- if .Values.secrets }} + secretschecksum: {{ include (print .Template.BasePath "/secret.yaml") . | sha256sum | trunc 63 }} + {{- end }} + spec: + {{- if .Values.schedulerName }} + schedulerName: "{{ .Values.schedulerName }}" + {{- end }} + securityContext: +{{ toYaml .Values.podSecurityContext | indent 8 }} + {{- if or .Values.rbac.create .Values.rbac.serviceAccountName }} + serviceAccountName: "{{ template "logstash.serviceAccount" . }}" + {{- end }} + {{- with .Values.tolerations }} + tolerations: +{{ toYaml . | indent 6 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: +{{ toYaml . | indent 8 }} + {{- end }} + {{- if or (eq .Values.antiAffinity "hard") (eq .Values.antiAffinity "soft") .Values.nodeAffinity .Values.podAffinity }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + affinity: + {{- end }} + {{- if eq .Values.antiAffinity "hard" }} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - "{{ template "logstash.fullname" .}}" + topologyKey: {{ .Values.antiAffinityTopologyKey }} + {{- else if eq .Values.antiAffinity "soft" }} + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + podAffinityTerm: + topologyKey: {{ .Values.antiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - "{{ template "logstash.fullname" . }}" + {{- end }} + {{- with .Values.nodeAffinity }} + nodeAffinity: +{{ toYaml . | indent 10 }} + {{- end }} + {{- with .Values.podAffinity }} + podAffinity: +{{ toYaml . | indent 10 }} + {{- end }} + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriod }} + volumes: + {{- range .Values.secretMounts }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + {{- end }} + {{- if .Values.logstashConfig }} + - name: logstashconfig + configMap: + name: {{ template "logstash.fullname" . }}-config + {{- end }} + {{- if .Values.logstashPipeline }} + - name: logstashpipeline + configMap: + name: {{ template "logstash.fullname" . }}-pipeline + {{- end }} + {{- if .Values.logstashPattern }} + - name: logstashpattern + configMap: + name: {{ template "logstash.fullname" . }}-pattern + {{- end }} + {{- if .Values.extraVolumes }} + {{- if eq "string" (printf "%T" .Values.extraVolumes) }} +{{ tpl .Values.extraVolumes . | indent 8 }} + {{- else }} +{{ toYaml .Values.extraVolumes | indent 8 }} + {{- end }} + {{- end }} + {{- if .Values.imagePullSecrets }} + imagePullSecrets: +{{ toYaml .Values.imagePullSecrets | indent 8 }} + {{- end}} + {{- if .Values.hostAliases }} + hostAliases: {{ toYaml .Values.hostAliases | nindent 6 }} + {{- end }} + {{- if .Values.extraInitContainers }} + initContainers: + {{- if eq "string" (printf "%T" .Values.extraInitContainers) }} +{{ tpl .Values.extraInitContainers . | indent 6 }} + {{- else }} +{{ toYaml .Values.extraInitContainers | indent 6 }} + {{- end }} + {{- end }} + containers: + - name: "{{ template "logstash.name" . }}" + securityContext: +{{ toYaml .Values.securityContext | indent 10 }} + image: "{{ .Values.image }}:{{ .Values.imageTag }}" + imagePullPolicy: "{{ .Values.imagePullPolicy }}" + livenessProbe: +{{ toYaml .Values.livenessProbe | indent 10 }} + readinessProbe: +{{ toYaml .Values.readinessProbe | indent 10 }} + ports: + - name: http + containerPort: {{ .Values.httpPort }} + {{- if .Values.extraPorts }} + {{- toYaml .Values.extraPorts | nindent 8 }} + {{- end }} + resources: +{{ toYaml .Values.resources | indent 10 }} + env: + - name: LS_JAVA_OPTS + value: "{{ .Values.logstashJavaOpts }}" +{{- if .Values.extraEnvs }} +{{ toYaml .Values.extraEnvs | indent 10 }} +{{- end }} +{{- if .Values.envFrom }} + envFrom: +{{ toYaml .Values.envFrom | indent 10 }} +{{- end }} + volumeMounts: + {{- if .Values.persistence.enabled }} + - name: "{{ template "logstash.fullname" . }}" + mountPath: /usr/share/logstash/data + {{- end }} + {{- range .Values.secretMounts }} + - name: {{ .name }} + mountPath: {{ .path }} + {{- if .subPath }} + subPath: {{ .subPath }} + {{- end }} + {{- end }} + {{- range $path, $config := .Values.logstashConfig }} + - name: logstashconfig + mountPath: /usr/share/logstash/config/{{ $path }} + subPath: {{ $path }} + {{- end -}} + {{- range $path, $config := .Values.logstashPipeline }} + - name: logstashpipeline + mountPath: /usr/share/logstash/pipeline/{{ $path }} + subPath: {{ $path }} + {{- end -}} + {{- if .Values.logstashPattern }} + {{- $logstashPatternDir := .Values.logstashPatternDir -}} + {{- range $path, $config := .Values.logstashPattern }} + - name: logstashpattern + mountPath: {{ $logstashPatternDir }}{{ $path }} + subPath: {{ $path }} + {{- end -}} + {{- end -}} + {{- if .Values.extraVolumeMounts }} + {{- if eq "string" (printf "%T" .Values.extraVolumeMounts) }} +{{ tpl .Values.extraVolumeMounts . | indent 10 }} + {{- else }} +{{ toYaml .Values.extraVolumeMounts | indent 10 }} + {{- end }} + {{- end }} +{{- if .Values.lifecycle }} + lifecycle: +{{ toYaml .Values.lifecycle | indent 10 }} +{{- end }} + {{- if .Values.extraContainers }} + {{- if eq "string" (printf "%T" .Values.extraContainers) }} +{{ tpl .Values.extraContainers . | indent 6 }} + {{- else }} +{{ toYaml .Values.extraContainers | indent 6 }} + {{- end }} + {{- end }} diff --git a/sregym/observer/logstash/values.yaml b/sregym/observer/logstash/values.yaml new file mode 100644 index 0000000..d42a01b --- /dev/null +++ b/sregym/observer/logstash/values.yaml @@ -0,0 +1,311 @@ +--- +replicas: 1 + +# Allows you to add any config files in /usr/share/logstash/config/ +# such as logstash.yml and log4j2.properties +# +# Note that when overriding logstash.yml, `http.host: 0.0.0.0` should always be included +# to make default probes work. +logstashConfig: {} +# logstash.yml: | +# key: +# nestedkey: value +# log4j2.properties: | +# key = value + +# Allows you to add any pipeline files in /usr/share/logstash/pipeline/ +### ***warn*** there is a hardcoded logstash.conf in the image, override it first +logstashPipeline: {} +# logstash.conf: | +# input { +# exec { +# command => "uptime" +# interval => 30 +# } +# } +# output { stdout { } } + +# Allows you to add any pattern files in your custom pattern dir +logstashPatternDir: "/usr/share/logstash/patterns/" +logstashPattern: {} +# pattern.conf: | +# DPKG_VERSION [-+~<>\.0-9a-zA-Z]+ + +# Extra environment variables to append to this nodeGroup +# This will be appended to the current 'env:' key. You can use any of the kubernetes env +# syntax here +extraEnvs: [] +# - name: MY_ENVIRONMENT_VAR +# value: the_value_goes_here + +# Allows you to load environment variables from kubernetes secret or config map +envFrom: [] +# - secretRef: +# name: env-secret +# - configMapRef: +# name: config-map + +# Add sensitive data to k8s secrets +secrets: [] +# - name: "env" +# value: +# ELASTICSEARCH_PASSWORD: "LS1CRUdJTiBgUFJJVkFURSB" +# api_key: ui2CsdUadTiBasRJRkl9tvNnw +# - name: "tls" +# value: +# ca.crt: | +# LS0tLS1CRUdJT0K +# LS0tLS1CRUdJT0K +# LS0tLS1CRUdJT0K +# LS0tLS1CRUdJT0K +# cert.crt: "LS0tLS1CRUdJTiBlRJRklDQVRFLS0tLS0K" +# cert.key.filepath: "secrets.crt" # The path to file should be relative to the `values.yaml` file. + +# A list of secrets and their paths to mount inside the pod +secretMounts: [] + +hostAliases: [] +#- ip: "127.0.0.1" +# hostnames: +# - "foo.local" +# - "bar.local" + +image: "docker.elastic.co/logstash/logstash" +imageTag: "8.7.1" +imagePullPolicy: "IfNotPresent" +imagePullSecrets: [] + +podAnnotations: {} + +# additionals labels +labels: {} + +logstashJavaOpts: "-Xmx1g -Xms1g" + +resources: + requests: + cpu: "100m" + memory: "1536Mi" + limits: + cpu: "1000m" + memory: "1536Mi" + +volumeClaimTemplate: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 1Gi + +rbac: + create: false + serviceAccountAnnotations: {} + serviceAccountName: "" + annotations: + {} + #annotation1: "value1" + #annotation2: "value2" + #annotation3: "value3" + +podSecurityPolicy: + create: false + name: "" + spec: + privileged: false + fsGroup: + rule: RunAsAny + runAsUser: + rule: RunAsAny + seLinux: + rule: RunAsAny + supplementalGroups: + rule: RunAsAny + volumes: + - secret + - configMap + - persistentVolumeClaim + +persistence: + enabled: false + annotations: {} + +extraVolumes: + [] + # - name: extras + # emptyDir: {} + +extraVolumeMounts: + [] + # - name: extras + # mountPath: /usr/share/extras + # readOnly: true + +extraContainers: + [] + # - name: do-something + # image: busybox + # command: ['do', 'something'] + +extraInitContainers: + [] + # - name: do-something + # image: busybox + # command: ['do', 'something'] + +# This is the PriorityClass settings as defined in +# https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass +priorityClassName: "" + +# By default this will make sure two pods don't end up on the same node +# Changing this to a region would allow you to spread pods across regions +antiAffinityTopologyKey: "kubernetes.io/hostname" + +# Hard means that by default pods will only be scheduled if there are enough nodes for them +# and that they will never end up on the same node. Setting this to soft will do this "best effort" +antiAffinity: "hard" + +# This is the node affinity settings as defined in +# https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#node-affinity +nodeAffinity: {} + +# This is inter-pod affinity settings as defined in +# https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#inter-pod-affinity-and-anti-affinity +podAffinity: {} + +# The default is to deploy all pods serially. By setting this to parallel all pods are started at +# the same time when bootstrapping the cluster +podManagementPolicy: "Parallel" + +httpPort: 9600 + +# Custom ports to add to logstash +extraPorts: + [] + # - name: beats + # containerPort: 5001 + +updateStrategy: RollingUpdate + +# This is the max unavailable setting for the pod disruption budget +# The default value of 1 will make sure that kubernetes won't allow more than 1 +# of your pods to be unavailable during maintenance +maxUnavailable: 1 + +podSecurityContext: + fsGroup: 1000 + runAsUser: 1000 + +securityContext: + capabilities: + drop: + - ALL + # readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + +# How long to wait for logstash to stop gracefully +terminationGracePeriod: 120 + +# Probes +# Default probes are using `httpGet` which requires that `http.host: 0.0.0.0` is part of +# `logstash.yml`. If needed probes can be disabled or overridden using the following syntaxes: +# +# disable livenessProbe +# livenessProbe: null +# +# replace httpGet default readinessProbe by some exec probe +# readinessProbe: +# httpGet: null +# exec: +# command: +# - curl +# - localhost:9600 + +livenessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 300 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + successThreshold: 1 + +readinessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 60 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + successThreshold: 3 + +## Use an alternate scheduler. +## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/ +## +schedulerName: "" + +nodeSelector: {} +tolerations: [] + +nameOverride: "" +fullnameOverride: "" + +lifecycle: + {} + # preStop: + # exec: + # command: ["/bin/sh", "-c", "echo Hello from the postStart handler > /usr/share/message"] + # postStart: + # exec: + # command: ["/bin/sh", "-c", "echo Hello from the postStart handler > /usr/share/message"] + +service: + {} + # annotations: {} + # type: ClusterIP + # loadBalancerIP: "" + # ports: + # - name: beats + # port: 5044 + # protocol: TCP + # targetPort: 5044 + # - name: http + # port: 8080 + # protocol: TCP + # targetPort: 8080 + +ingress: + enabled: false + annotations: + {} + # kubernetes.io/tls-acme: "true" + className: "nginx" + pathtype: ImplementationSpecific + hosts: + - host: logstash-example.local + paths: + - path: /beats + servicePort: 5044 + - path: /http + servicePort: 8080 + tls: [] + # - secretName: logstash-example-tls + # hosts: + # - logstash-example.local + + # template: + # settings: + # index: + # lifecycle: + # name: "log_test1" + # rollover_alias: "logstash_suited" + # number_of_shards: 3 + # number_of_replicas: 1 + + output: + elasticsearch: + hosts: ["https://10.10.1.202:9200"] + index: "logstash-%{+YYYY.MM.dd}" + template_name: "logstash-template" + template_overwride: false \ No newline at end of file diff --git a/sregym/observer/prometheus/prometheus-pvc.yml b/sregym/observer/prometheus/prometheus-pvc.yml new file mode 100644 index 0000000..aa8afd3 --- /dev/null +++ b/sregym/observer/prometheus/prometheus-pvc.yml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: prometheus-pvc +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: openebs-hostpath \ No newline at end of file diff --git a/sregym/observer/prometheus/prometheus/.helmignore b/sregym/observer/prometheus/prometheus/.helmignore new file mode 100644 index 0000000..0c4eb83 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj + +OWNERS diff --git a/sregym/observer/prometheus/prometheus/Chart.yaml b/sregym/observer/prometheus/prometheus/Chart.yaml new file mode 100644 index 0000000..1094661 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/Chart.yaml @@ -0,0 +1,53 @@ +apiVersion: v2 +name: prometheus +appVersion: v2.47.2 +version: 25.6.0 +kubeVersion: ">=1.19.0-0" +description: Prometheus is a monitoring system and time series database. +home: https://prometheus.io/ +icon: https://raw.githubusercontent.com/prometheus/prometheus.github.io/master/assets/prometheus_logo-cb55bb5c346.png +sources: + - https://github.com/prometheus/alertmanager + - https://github.com/prometheus/prometheus + - https://github.com/prometheus/pushgateway + - https://github.com/prometheus/node_exporter + - https://github.com/kubernetes/kube-state-metrics +maintainers: + - name: gianrubio + email: gianrubio@gmail.com + - name: zanhsieh + email: zanhsieh@gmail.com + - name: Xtigyro + email: miroslav.hadzhiev@gmail.com + - name: naseemkullah + email: naseem@transit.app + - name: zeritti + email: rootsandtrees@posteo.de +type: application +dependencies: + - name: alertmanager + version: "1.7.*" + repository: https://prometheus-community.github.io/helm-charts + condition: alertmanager.enabled + - name: kube-state-metrics + version: "5.15.*" + repository: https://prometheus-community.github.io/helm-charts + condition: kube-state-metrics.enabled + - name: prometheus-node-exporter + version: "4.23.*" + repository: https://prometheus-community.github.io/helm-charts + condition: prometheus-node-exporter.enabled + - name: prometheus-pushgateway + version: "2.4.*" + repository: https://prometheus-community.github.io/helm-charts + condition: prometheus-pushgateway.enabled +keywords: + - monitoring + - prometheus +annotations: + "artifacthub.io/license": Apache-2.0 + "artifacthub.io/links": | + - name: Chart Source + url: https://github.com/prometheus-community/helm-charts + - name: Upstream Project + url: https://github.com/prometheus/prometheus diff --git a/sregym/observer/prometheus/prometheus/README.md b/sregym/observer/prometheus/prometheus/README.md new file mode 100644 index 0000000..6bfe35b --- /dev/null +++ b/sregym/observer/prometheus/prometheus/README.md @@ -0,0 +1,382 @@ +# Prometheus + +[Prometheus](https://prometheus.io/), a [Cloud Native Computing Foundation](https://cncf.io/) project, is a systems and service monitoring system. It collects metrics from configured targets at given intervals, evaluates rule expressions, displays the results, and can trigger alerts if some condition is observed to be true. + +This chart bootstraps a [Prometheus](https://prometheus.io/) deployment on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager. + +## Prerequisites + +- Kubernetes 1.19+ +- Helm 3.7+ + +## Get Repository Info + +```console +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +``` + +_See [helm repository](https://helm.sh/docs/helm/helm_repo/) for command documentation._ + +## Install Chart + +Starting with version 16.0, the Prometheus chart requires Helm 3.7+ in order to install successfully. Please check your `helm` release before installation. + +```console +helm install [RELEASE_NAME] prometheus-community/prometheus +``` + +_See [configuration](#configuration) below._ + +_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ + +## Dependencies + +By default this chart installs additional, dependent charts: + +- [alertmanager](https://github.com/prometheus-community/helm-charts/tree/main/charts/alertmanager) +- [kube-state-metrics](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics) +- [prometheus-node-exporter](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter) +- [prometheus-pushgateway](https://github.com/walker-tom/helm-charts/tree/main/charts/prometheus-pushgateway) + +To disable the dependency during installation, set `alertmanager.enabled`, `kube-state-metrics.enabled`, `prometheus-node-exporter.enabled` and `prometheus-pushgateway.enabled` to `false`. + +_See [helm dependency](https://helm.sh/docs/helm/helm_dependency/) for command documentation._ + +## Uninstall Chart + +```console +helm uninstall [RELEASE_NAME] +``` + +This removes all the Kubernetes components associated with the chart and deletes the release. + +_See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall/) for command documentation._ + +## Updating values.schema.json + +A [`values.schema.json`](https://helm.sh/docs/topics/charts/#schema-files) file has been added to validate chart values. When `values.yaml` file has a structure change (i.e. add a new field, change value type, etc.), modify `values.schema.json` file manually or run `helm schema-gen values.yaml > values.schema.json` to ensure the schema is aligned with the latest values. Refer to [helm plugin `helm-schema-gen`](https://github.com/karuppiah7890/helm-schema-gen) for plugin installation instructions. + +## Upgrading Chart + +```console +helm upgrade [RELEASE_NAME] prometheus-community/prometheus --install +``` + +_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._ + +### To 25.0 + +The `server.remoteRead[].url` and `server.remoteWrite[].url` fields now support templating. Allowing for `url` values such as `https://{{ .Release.Name }}.example.com`. + +Any entries in these which previously included `{{` or `}}` must be escaped with `{{ "{{" }}` and `{{ "}}" }}` respectively. Entries which did not previously include the template-like syntax will not be affected. + +### To 24.0 + +Require Kubernetes 1.19+ + +Release 1.0.0 of the _alertmanager_ replaced [configmap-reload](https://github.com/jimmidyson/configmap-reload) with [prometheus-config-reloader](https://github.com/prometheus-operator/prometheus-operator/tree/main/cmd/prometheus-config-reloader). +Extra command-line arguments specified via `configmapReload.prometheus.extraArgs` are not compatible and will break with the new prometheus-config-reloader. Please, refer to the [sources](https://github.com/prometheus-operator/prometheus-operator/blob/main/cmd/prometheus-config-reloader/main.go) in order to make the appropriate adjustment to the extra command-line arguments. + +### To 23.0 + +Release 5.0.0 of the _kube-state-metrics_ chart introduced a separation of the `image.repository` value in two distinct values: + +```console + image: + registry: registry.k8s.io + repository: kube-state-metrics/kube-state-metrics +``` + +If a custom values file or CLI flags set `kube-state.metrics.image.repository`, please, set the new values accordingly. + +If you are upgrading _prometheus-pushgateway_ with the chart and _prometheus-pushgateway_ has been deployed as a statefulset with a persistent volume, the statefulset must be deleted before upgrading the chart, e.g.: + +```bash +kubectl delete sts -l app.kubernetes.io/name=prometheus-pushgateway -n monitoring --cascade=orphan +``` + +Users are advised to review changes in the corresponding chart releases before upgrading. + +### To 22.0 + +The `app.kubernetes.io/version` label has been removed from the pod selector. + +Therefore, you must delete the previous StatefulSet or Deployment before upgrading. Performing this operation will cause **Prometheus to stop functioning** until the upgrade is complete. + +```console +kubectl delete deploy,sts -l app.kubernetes.io/name=prometheus +``` + +### To 21.0 + +The Kubernetes labels have been updated to follow [Helm 3 label and annotation best practices](https://helm.sh/docs/chart_best_practices/labels/). +Specifically, labels mapping is listed below: + +| OLD | NEW | +|--------------------|------------------------------| +|heritage | app.kubernetes.io/managed-by | +|chart | helm.sh/chart | +|[container version] | app.kubernetes.io/version | +|app | app.kubernetes.io/name | +|release | app.kubernetes.io/instance | + +Therefore, depending on the way you've configured the chart, the previous StatefulSet or Deployment need to be deleted before upgrade. + +If `runAsStatefulSet: false` (this is the default): + +```console +kubectl delete deploy -l app=prometheus +``` + +If `runAsStatefulSet: true`: + +```console +kubectl delete sts -l app=prometheus +``` + +After that do the actual upgrade: + +```console +helm upgrade -i prometheus prometheus-community/prometheus +``` + +### To 20.0 + +The [configmap-reload](https://github.com/jimmidyson/configmap-reload) container was replaced by the [prometheus-config-reloader](https://github.com/prometheus-operator/prometheus-operator/tree/main/cmd/prometheus-config-reloader). +Extra command-line arguments specified via configmapReload.prometheus.extraArgs are not compatible and will break with the new prometheus-config-reloader, refer to the [sources](https://github.com/prometheus-operator/prometheus-operator/blob/main/cmd/prometheus-config-reloader/main.go) in order to make the appropriate adjustment to the extra command-line arguments. + +### To 19.0 + +Prometheus has been updated to version v2.40.5. + +Prometheus-pushgateway was updated to version 2.0.0 which adapted [Helm label and annotation best practices](https://helm.sh/docs/chart_best_practices/labels/). +See the [upgrade docs of the prometheus-pushgateway chart](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-pushgateway#to-200) to see whats to do, before you upgrade Prometheus! + +The condition in Chart.yaml to disable kube-state-metrics has been changed from `kubeStateMetrics.enabled` to `kube-state-metrics.enabled` + +The Docker image tag is used from appVersion field in Chart.yaml by default. + +Unused subchart configs has been removed and subchart config is now on the bottom of the config file. + +If Prometheus is used as deployment the updatestrategy has been changed to "Recreate" by default, so Helm updates work out of the box. + +`.Values.server.extraTemplates` & `.Values.server.extraObjects` has been removed in favour of `.Values.extraManifests`, which can do the same. + +`.Values.server.enabled` has been removed as it's useless now that all components are created by subcharts. + +All files in `templates/server` directory has been moved to `templates` directory. + +```bash +helm upgrade [RELEASE_NAME] prometheus-community/prometheus --version 19.0.0 +``` + +### To 18.0 + +Version 18.0.0 uses alertmanager service from the [alertmanager chart](https://github.com/prometheus-community/helm-charts/tree/main/charts/alertmanager). If you've made some config changes, please check the old `alertmanager` and the new `alertmanager` configuration section in values.yaml for differences. + +Note that the `configmapReload` section for `alertmanager` was moved out of dedicated section (`configmapReload.alertmanager`) to alertmanager embedded (`alertmanager.configmapReload`). + +Before you update, please scale down the `prometheus-server` deployment to `0` then perform upgrade: + +```bash +# In 17.x +kubectl scale deploy prometheus-server --replicas=0 +# Upgrade +helm upgrade [RELEASE_NAME] prometheus-community/prometheus --version 18.0.0 +``` + +### To 17.0 + +Version 17.0.0 uses pushgateway service from the [prometheus-pushgateway chart](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-pushgateway). If you've made some config changes, please check the old `pushgateway` and the new `prometheus-pushgateway` configuration section in values.yaml for differences. + +Before you update, please scale down the `prometheus-server` deployment to `0` then perform upgrade: + +```bash +# In 16.x +kubectl scale deploy prometheus-server --replicas=0 +# Upgrade +helm upgrade [RELEASE_NAME] prometheus-community/prometheus --version 17.0.0 +``` + +### To 16.0 + +Starting from version 16.0 embedded services (like alertmanager, node-exporter etc.) are moved out of Prometheus chart and the respecting charts from this repository are used as dependencies. Version 16.0.0 moves node-exporter service to [prometheus-node-exporter chart](https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter). If you've made some config changes, please check the old `nodeExporter` and the new `prometheus-node-exporter` configuration section in values.yaml for differences. + +Before you update, please scale down the `prometheus-server` deployment to `0` then perform upgrade: + +```bash +# In 15.x +kubectl scale deploy prometheus-server --replicas=0 +# Upgrade +helm upgrade [RELEASE_NAME] prometheus-community/prometheus --version 16.0.0 +``` + +### To 15.0 + +Version 15.0.0 changes the relabeling config, aligning it with the [Prometheus community conventions](https://github.com/prometheus/prometheus/pull/9832). If you've made manual changes to the relabeling config, you have to adapt your changes. + +Before you update please execute the following command, to be able to update kube-state-metrics: + +```bash +kubectl delete deployments.apps -l app.kubernetes.io/instance=prometheus,app.kubernetes.io/name=kube-state-metrics --cascade=orphan +``` + +### To 9.0 + +Version 9.0 adds a new option to enable or disable the Prometheus Server. This supports the use case of running a Prometheus server in one k8s cluster and scraping exporters in another cluster while using the same chart for each deployment. To install the server `server.enabled` must be set to `true`. + +### To 5.0 + +As of version 5.0, this chart uses Prometheus 2.x. This version of prometheus introduces a new data format and is not compatible with prometheus 1.x. It is recommended to install this as a new release, as updating existing releases will not work. See the [prometheus docs](https://prometheus.io/docs/prometheus/latest/migration/#storage) for instructions on retaining your old data. + +Prometheus version 2.x has made changes to alertmanager, storage and recording rules. Check out the migration guide [here](https://prometheus.io/docs/prometheus/2.0/migration/). + +Users of this chart will need to update their alerting rules to the new format before they can upgrade. + +### Example Migration + +Assuming you have an existing release of the prometheus chart, named `prometheus-old`. In order to update to prometheus 2.x while keeping your old data do the following: + +1. Update the `prometheus-old` release. Disable scraping on every component besides the prometheus server, similar to the configuration below: + + ```yaml + alertmanager: + enabled: false + alertmanagerFiles: + alertmanager.yml: "" + kubeStateMetrics: + enabled: false + nodeExporter: + enabled: false + pushgateway: + enabled: false + server: + extraArgs: + storage.local.retention: 720h + serverFiles: + alerts: "" + prometheus.yml: "" + rules: "" + ``` + +1. Deploy a new release of the chart with version 5.0+ using prometheus 2.x. In the values.yaml set the scrape config as usual, and also add the `prometheus-old` instance as a remote-read target. + + ```yaml + prometheus.yml: + ... + remote_read: + - url: http://prometheus-old/api/v1/read + ... + ``` + + Old data will be available when you query the new prometheus instance. + +## Configuration + +See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments, visit the chart's [values.yaml](./values.yaml), or run these configuration commands: + +```console +helm show values prometheus-community/prometheus +``` + +You may similarly use the above configuration commands on each chart [dependency](#dependencies) to see its configurations. + +### Scraping Pod Metrics via Annotations + +This chart uses a default configuration that causes prometheus to scrape a variety of kubernetes resource types, provided they have the correct annotations. In this section we describe how to configure pods to be scraped; for information on how other resource types can be scraped you can do a `helm template` to get the kubernetes resource definitions, and then reference the prometheus configuration in the ConfigMap against the prometheus documentation for [relabel_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) and [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config). + +In order to get prometheus to scrape pods, you must add annotations to the pods as below: + +```yaml +metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/path: /metrics + prometheus.io/port: "8080" +``` + +You should adjust `prometheus.io/path` based on the URL that your pod serves metrics from. `prometheus.io/port` should be set to the port that your pod serves metrics from. Note that the values for `prometheus.io/scrape` and `prometheus.io/port` must be enclosed in double quotes. + +### Sharing Alerts Between Services + +Note that when [installing](#install-chart) or [upgrading](#upgrading-chart) you may use multiple values override files. This is particularly useful when you have alerts belonging to multiple services in the cluster. For example, + +```yaml +# values.yaml +# ... + +# service1-alert.yaml +serverFiles: + alerts: + service1: + - alert: anAlert + # ... + +# service2-alert.yaml +serverFiles: + alerts: + service2: + - alert: anAlert + # ... +``` + +```console +helm install [RELEASE_NAME] prometheus-community/prometheus -f values.yaml -f service1-alert.yaml -f service2-alert.yaml +``` + +### RBAC Configuration + +Roles and RoleBindings resources will be created automatically for `server` service. + +To manually setup RBAC you need to set the parameter `rbac.create=false` and specify the service account to be used for each service by setting the parameters: `serviceAccounts.{{ component }}.create` to `false` and `serviceAccounts.{{ component }}.name` to the name of a pre-existing service account. + +> **Tip**: You can refer to the default `*-clusterrole.yaml` and `*-clusterrolebinding.yaml` files in [templates](templates/) to customize your own. + +### ConfigMap Files + +AlertManager is configured through [alertmanager.yml](https://prometheus.io/docs/alerting/configuration/). This file (and any others listed in `alertmanagerFiles`) will be mounted into the `alertmanager` pod. + +Prometheus is configured through [prometheus.yml](https://prometheus.io/docs/operating/configuration/). This file (and any others listed in `serverFiles`) will be mounted into the `server` pod. + +### Ingress TLS + +If your cluster allows automatic creation/retrieval of TLS certificates (e.g. [cert-manager](https://github.com/jetstack/cert-manager)), please refer to the documentation for that mechanism. + +To manually configure TLS, first create/retrieve a key & certificate pair for the address(es) you wish to protect. Then create a TLS secret in the namespace: + +```console +kubectl create secret tls prometheus-server-tls --cert=path/to/tls.cert --key=path/to/tls.key +``` + +Include the secret's name, along with the desired hostnames, in the alertmanager/server Ingress TLS section of your custom `values.yaml` file: + +```yaml +server: + ingress: + ## If true, Prometheus server Ingress will be created + ## + enabled: true + + ## Prometheus server Ingress hostnames + ## Must be provided if Ingress is enabled + ## + hosts: + - prometheus.domain.com + + ## Prometheus server Ingress TLS configuration + ## Secrets must be manually created in the namespace + ## + tls: + - secretName: prometheus-server-tls + hosts: + - prometheus.domain.com +``` + +### NetworkPolicy + +Enabling Network Policy for Prometheus will secure connections to Alert Manager and Kube State Metrics by only accepting connections from Prometheus Server. All inbound connections to Prometheus Server are still allowed. + +To enable network policy for Prometheus, install a networking plugin that implements the Kubernetes NetworkPolicy spec, and set `networkPolicy.enabled` to true. + +If NetworkPolicy is enabled for Prometheus' scrape targets, you may also need to manually create a networkpolicy which allows it. diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/.helmignore b/sregym/observer/prometheus/prometheus/charts/alertmanager/.helmignore new file mode 100644 index 0000000..4685b40 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/.helmignore @@ -0,0 +1,25 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ + +unittests/ diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/Chart.yaml b/sregym/observer/prometheus/prometheus/charts/alertmanager/Chart.yaml new file mode 100644 index 0000000..02f6bb7 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: alertmanager +description: The Alertmanager handles alerts sent by client applications such as the Prometheus server. +home: https://prometheus.io/ +icon: https://raw.githubusercontent.com/prometheus/prometheus.github.io/master/assets/prometheus_logo-cb55bb5c346.png +sources: + - https://github.com/prometheus/alertmanager +type: application +version: 1.7.0 +appVersion: v0.26.0 +kubeVersion: ">=1.19.0-0" +keywords: + - monitoring +maintainers: + - name: monotek + email: monotek23@gmail.com + - name: naseemkullah + email: naseem@transit.app +annotations: + "artifacthub.io/license": Apache-2.0 + "artifacthub.io/links": | + - name: Chart Source + url: https://github.com/prometheus-community/helm-charts diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/README.md b/sregym/observer/prometheus/prometheus/charts/alertmanager/README.md new file mode 100644 index 0000000..9513f3a --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/README.md @@ -0,0 +1,62 @@ +# Alertmanager + +As per [prometheus.io documentation](https://prometheus.io/docs/alerting/latest/alertmanager/): +> The Alertmanager handles alerts sent by client applications such as the +> Prometheus server. It takes care of deduplicating, grouping, and routing them +> to the correct receiver integration such as email, PagerDuty, or OpsGenie. It +> also takes care of silencing and inhibition of alerts. + +## Prerequisites + +Kubernetes 1.14+ + +## Get Repository Info + +```console +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +``` + +_See [`helm repo`](https://helm.sh/docs/helm/helm_repo/) for command documentation._ + +## Install Chart + +```console +helm install [RELEASE_NAME] prometheus-community/alertmanager +``` + +_See [configuration](#configuration) below._ + +_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ + +## Uninstall Chart + +```console +helm uninstall [RELEASE_NAME] +``` + +This removes all the Kubernetes components associated with the chart and deletes the release. + +_See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall/) for command documentation._ + +## Upgrading Chart + +```console +helm upgrade [RELEASE_NAME] [CHART] --install +``` + +_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._ + +### To 1.0 + +The [configmap-reload](https://github.com/jimmidyson/configmap-reload) container was replaced by the [prometheus-config-reloader](https://github.com/prometheus-operator/prometheus-operator/tree/main/cmd/prometheus-config-reloader). +Extra command-line arguments specified via configmapReload.prometheus.extraArgs are not compatible and will break with the new prometheus-config-reloader, refer to the [sources](https://github.com/prometheus-operator/prometheus-operator/blob/main/cmd/prometheus-config-reloader/main.go) in order to make the appropriate adjustment to the extea command-line arguments. +The `networking.k8s.io/v1beta1` is no longer supported. use [`networking.k8s.io/v1`](https://kubernetes.io/docs/reference/using-api/deprecation-guide/#ingressclass-v122). + +## Configuration + +See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments, visit the chart's [values.yaml](./values.yaml), or run these configuration commands: + +```console +helm show values prometheus-community/alertmanager +``` diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/ci/config-reload-values.yaml b/sregym/observer/prometheus/prometheus/charts/alertmanager/ci/config-reload-values.yaml new file mode 100644 index 0000000..bb310b2 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/ci/config-reload-values.yaml @@ -0,0 +1,2 @@ +configmapReload: + enabled: true diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/NOTES.txt b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/NOTES.txt new file mode 100644 index 0000000..bf34e7a --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/NOTES.txt @@ -0,0 +1,21 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ include "alertmanager.namespace" . }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "alertmanager.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ include "alertmanager.namespace" . }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get --namespace {{ include "alertmanager.namespace" . }} svc -w {{ include "alertmanager.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ include "alertmanager.namespace" . }} {{ include "alertmanager.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ include "alertmanager.namespace" . }} -l "app.kubernetes.io/name={{ include "alertmanager.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:{{ .Values.service.port }} to use your application" + kubectl --namespace {{ include "alertmanager.namespace" . }} port-forward $POD_NAME {{ .Values.service.port }}:80 +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/_helpers.tpl b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/_helpers.tpl new file mode 100644 index 0000000..397f442 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/_helpers.tpl @@ -0,0 +1,92 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "alertmanager.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "alertmanager.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "alertmanager.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "alertmanager.labels" -}} +helm.sh/chart: {{ include "alertmanager.chart" . }} +{{ include "alertmanager.selectorLabels" . }} +{{- with .Chart.AppVersion }} +app.kubernetes.io/version: {{ . | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "alertmanager.selectorLabels" -}} +app.kubernetes.io/name: {{ include "alertmanager.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "alertmanager.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "alertmanager.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Define Ingress apiVersion +*/}} +{{- define "alertmanager.ingress.apiVersion" -}} +{{- printf "networking.k8s.io/v1" }} +{{- end }} + +{{/* +Define Pdb apiVersion +*/}} +{{- define "alertmanager.pdb.apiVersion" -}} +{{- if $.Capabilities.APIVersions.Has "policy/v1/PodDisruptionBudget" }} +{{- printf "policy/v1" }} +{{- else }} +{{- printf "policy/v1beta1" }} +{{- end }} +{{- end }} + +{{/* +Allow overriding alertmanager namespace +*/}} +{{- define "alertmanager.namespace" -}} +{{- if .Values.namespaceOverride -}} +{{- .Values.namespaceOverride -}} +{{- else -}} +{{- .Release.Namespace -}} +{{- end -}} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/configmap.yaml b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/configmap.yaml new file mode 100644 index 0000000..71c955e --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/configmap.yaml @@ -0,0 +1,21 @@ +{{- if .Values.config.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "alertmanager.fullname" . }} + labels: + {{- include "alertmanager.labels" . | nindent 4 }} + {{- with .Values.configAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + namespace: {{ include "alertmanager.namespace" . }} +data: + alertmanager.yml: | + {{- $config := omit .Values.config "enabled" }} + {{- toYaml $config | default "{}" | nindent 4 }} + {{- range $key, $value := .Values.templates }} + {{ $key }}: |- + {{- $value | nindent 4 }} + {{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/ingress.yaml b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/ingress.yaml new file mode 100644 index 0000000..8394ef5 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/ingress.yaml @@ -0,0 +1,44 @@ +{{- if .Values.ingress.enabled }} +{{- $fullName := include "alertmanager.fullname" . }} +{{- $svcPort := .Values.service.port }} +apiVersion: {{ include "alertmanager.ingress.apiVersion" . }} +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + {{- include "alertmanager.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + namespace: {{ include "alertmanager.namespace" . }} +spec: + {{- if .Values.ingress.className }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + pathType: {{ .pathType }} + backend: + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- end }} + {{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/ingressperreplica.yaml b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/ingressperreplica.yaml new file mode 100644 index 0000000..3e8077d --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/ingressperreplica.yaml @@ -0,0 +1,56 @@ +{{- if and .Values.servicePerReplica.enabled .Values.ingressPerReplica.enabled }} +{{- $pathType := .Values.ingressPerReplica.pathType }} +{{- $count := .Values.replicaCount | int -}} +{{- $servicePort := .Values.service.port -}} +{{- $ingressValues := .Values.ingressPerReplica -}} +{{- $fullName := include "alertmanager.fullname" . }} +apiVersion: v1 +kind: List +metadata: + name: {{ $fullName }}-ingressperreplica + namespace: {{ include "alertmanager.namespace" . }} +items: +{{- range $i, $e := until $count }} + - kind: Ingress + apiVersion: {{ include "alertmanager.ingress.apiVersion" $ }} + metadata: + name: {{ $fullName }}-{{ $i }} + namespace: {{ include "alertmanager.namespace" $ }} + labels: + {{- include "alertmanager.labels" $ | nindent 8 }} + {{- if $ingressValues.labels }} +{{ toYaml $ingressValues.labels | indent 8 }} + {{- end }} + {{- if $ingressValues.annotations }} + annotations: +{{ toYaml $ingressValues.annotations | indent 8 }} + {{- end }} + spec: + {{- if $ingressValues.className }} + ingressClassName: {{ $ingressValues.className }} + {{- end }} + rules: + - host: {{ $ingressValues.hostPrefix }}-{{ $i }}.{{ $ingressValues.hostDomain }} + http: + paths: + {{- range $p := $ingressValues.paths }} + - path: {{ tpl $p $ }} + pathType: {{ $pathType }} + backend: + service: + name: {{ $fullName }}-{{ $i }} + port: + name: http + {{- end -}} + {{- if or $ingressValues.tlsSecretName $ingressValues.tlsSecretPerReplica.enabled }} + tls: + - hosts: + - {{ $ingressValues.hostPrefix }}-{{ $i }}.{{ $ingressValues.hostDomain }} + {{- if $ingressValues.tlsSecretPerReplica.enabled }} + secretName: {{ $ingressValues.tlsSecretPerReplica.prefix }}-{{ $i }} + {{- else }} + secretName: {{ $ingressValues.tlsSecretName }} + {{- end }} + {{- end }} +{{- end -}} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/pdb.yaml b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/pdb.yaml new file mode 100644 index 0000000..83a4eb8 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/pdb.yaml @@ -0,0 +1,14 @@ +{{- if .Values.podDisruptionBudget }} +apiVersion: {{ include "alertmanager.pdb.apiVersion" . }} +kind: PodDisruptionBudget +metadata: + name: {{ include "alertmanager.fullname" . }} + labels: + {{- include "alertmanager.labels" . | nindent 4 }} + namespace: {{ include "alertmanager.namespace" . }} +spec: + selector: + matchLabels: + {{- include "alertmanager.selectorLabels" . | nindent 6 }} + {{- toYaml .Values.podDisruptionBudget | nindent 2 }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/serviceaccount.yaml b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/serviceaccount.yaml new file mode 100644 index 0000000..9e26e31 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/serviceaccount.yaml @@ -0,0 +1,14 @@ +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "alertmanager.serviceAccountName" . }} + labels: + {{- include "alertmanager.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + namespace: {{ include "alertmanager.namespace" . }} +automountServiceAccountToken: {{ .Values.automountServiceAccountToken }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/serviceperreplica.yaml b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/serviceperreplica.yaml new file mode 100644 index 0000000..81f15c0 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/serviceperreplica.yaml @@ -0,0 +1,44 @@ +{{- if and .Values.servicePerReplica.enabled }} +{{- $count := .Values.replicaCount | int -}} +{{- $serviceValues := .Values.servicePerReplica -}} +apiVersion: v1 +kind: List +metadata: + name: {{ include "alertmanager.fullname" . }}-serviceperreplica + namespace: {{ include "alertmanager.namespace" . }} +items: +{{- range $i, $e := until $count }} + - apiVersion: v1 + kind: Service + metadata: + name: {{ include "alertmanager.fullname" $ }}-{{ $i }} + namespace: {{ include "alertmanager.namespace" $ }} + labels: + {{- include "alertmanager.labels" $ | nindent 8 }} + {{- if $serviceValues.annotations }} + annotations: +{{ toYaml $serviceValues.annotations | indent 8 }} + {{- end }} + spec: + {{- if $serviceValues.clusterIP }} + clusterIP: {{ $serviceValues.clusterIP }} + {{- end }} + {{- if $serviceValues.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := $serviceValues.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} + {{- end }} + {{- if ne $serviceValues.type "ClusterIP" }} + externalTrafficPolicy: {{ $serviceValues.externalTrafficPolicy }} + {{- end }} + ports: + - name: http + port: {{ $.Values.service.port }} + targetPort: http + selector: + {{- include "alertmanager.selectorLabels" $ | nindent 8 }} + statefulset.kubernetes.io/pod-name: {{ include "alertmanager.fullname" $ }}-{{ $i }} + type: "{{ $serviceValues.type }}" +{{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/services.yaml b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/services.yaml new file mode 100644 index 0000000..fa1575f --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/services.yaml @@ -0,0 +1,71 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "alertmanager.fullname" . }} + labels: + {{- include "alertmanager.labels" . | nindent 4 }} + {{- with .Values.service.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + namespace: {{ include "alertmanager.namespace" . }} +spec: + type: {{ .Values.service.type }} + {{- with .Values.service.loadBalancerIP }} + loadBalancerIP: {{ . }} + {{- end }} + {{- with .Values.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := . }} + - {{ $cidr }} + {{- end }} + {{- end }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + {{- if (and (eq .Values.service.type "NodePort") .Values.service.nodePort) }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} + {{- with .Values.service.extraPorts }} + {{- toYaml . | nindent 4 }} + {{- end }} + selector: + {{- include "alertmanager.selectorLabels" . | nindent 4 }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "alertmanager.fullname" . }}-headless + labels: + {{- include "alertmanager.labels" . | nindent 4 }} + {{- with .Values.service.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + namespace: {{ include "alertmanager.namespace" . }} +spec: + clusterIP: None + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + {{- if or (gt (int .Values.replicaCount) 1) (.Values.additionalPeers) }} + - port: {{ .Values.service.clusterPort }} + targetPort: clusterpeer-tcp + protocol: TCP + name: cluster-tcp + - port: {{ .Values.service.clusterPort }} + targetPort: clusterpeer-udp + protocol: UDP + name: cluster-udp + {{- end }} + {{- with .Values.service.extraPorts }} + {{- toYaml . | nindent 4 }} + {{- end }} + selector: + {{- include "alertmanager.selectorLabels" . | nindent 4 }} diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/statefulset.yaml b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/statefulset.yaml new file mode 100644 index 0000000..047ade5 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/statefulset.yaml @@ -0,0 +1,247 @@ +{{- $svcClusterPort := .Values.service.clusterPort }} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "alertmanager.fullname" . }} + labels: + {{- include "alertmanager.labels" . | nindent 4 }} + {{- with .Values.statefulSet.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + namespace: {{ include "alertmanager.namespace" . }} +spec: + replicas: {{ .Values.replicaCount }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + selector: + matchLabels: + {{- include "alertmanager.selectorLabels" . | nindent 6 }} + serviceName: {{ include "alertmanager.fullname" . }}-headless + template: + metadata: + labels: + {{- include "alertmanager.selectorLabels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + annotations: + {{- if not .Values.configmapReload.enabled }} + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- end }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + automountServiceAccountToken: {{ .Values.automountServiceAccountToken }} + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "alertmanager.serviceAccountName" . }} + {{- with .Values.dnsConfig }} + dnsConfig: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.hostAliases }} + hostAliases: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.schedulerName }} + schedulerName: {{ . }} + {{- end }} + {{- if or .Values.podAntiAffinity .Values.affinity }} + affinity: + {{- end }} + {{- with .Values.affinity }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if eq .Values.podAntiAffinity "hard" }} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - topologyKey: {{ .Values.podAntiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - {key: app.kubernetes.io/name, operator: In, values: [{{ include "alertmanager.name" . }}]} + {{- else if eq .Values.podAntiAffinity "soft" }} + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + topologyKey: {{ .Values.podAntiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - {key: app.kubernetes.io/name, operator: In, values: [{{ include "alertmanager.name" . }}]} + {{- end }} + {{- with .Values.priorityClassName }} + priorityClassName: {{ . }} + {{- end }} + {{- with .Values.topologySpreadConstraints }} + topologySpreadConstraints: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- with .Values.extraInitContainers }} + initContainers: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + {{- if .Values.configmapReload.enabled }} + - name: {{ .Chart.Name }}-{{ .Values.configmapReload.name }} + image: "{{ .Values.configmapReload.image.repository }}:{{ .Values.configmapReload.image.tag }}" + imagePullPolicy: "{{ .Values.configmapReload.image.pullPolicy }}" + {{- with .Values.configmapReload.extraEnv }} + env: + {{- toYaml . | nindent 12 }} + {{- end }} + args: + {{- if and (hasKey .Values.configmapReload.extraArgs "config-file" | not) (hasKey .Values.configmapReload.extraArgs "watched-dir" | not) }} + - --watched-dir=/etc/alertmanager + {{- end }} + {{- if not (hasKey .Values.configmapReload.extraArgs "reload-url") }} + - --reload-url=http://127.0.0.1:9093/-/reload + {{- end }} + {{- range $key, $value := .Values.configmapReload.extraArgs }} + - --{{ $key }}={{ $value }} + {{- end }} + resources: + {{- toYaml .Values.configmapReload.resources | nindent 12 }} + {{- with .Values.configmapReload.containerPort }} + ports: + - containerPort: {{ . }} + {{- end }} + {{- with .Values.configmapReload.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: config + mountPath: /etc/alertmanager + {{- if .Values.configmapReload.extraVolumeMounts }} + {{- toYaml .Values.configmapReload.extraVolumeMounts | nindent 12 }} + {{- end }} + {{- end }} + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + - name: POD_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + {{- if .Values.extraEnv }} + {{- toYaml .Values.extraEnv | nindent 12 }} + {{- end }} + {{- with .Values.command }} + command: + {{- toYaml . | nindent 12 }} + {{- end }} + args: + - --storage.path=/alertmanager + {{- if not (hasKey .Values.extraArgs "config.file") }} + - --config.file=/etc/alertmanager/alertmanager.yml + {{- end }} + {{- if or (gt (int .Values.replicaCount) 1) (.Values.additionalPeers) }} + - --cluster.advertise-address=[$(POD_IP)]:{{ $svcClusterPort }} + - --cluster.listen-address=0.0.0.0:{{ $svcClusterPort }} + {{- end }} + {{- if gt (int .Values.replicaCount) 1}} + {{- $fullName := include "alertmanager.fullname" . }} + {{- range $i := until (int .Values.replicaCount) }} + - --cluster.peer={{ $fullName }}-{{ $i }}.{{ $fullName }}-headless:{{ $svcClusterPort }} + {{- end }} + {{- end }} + {{- if .Values.additionalPeers }} + {{- range $item := .Values.additionalPeers }} + - --cluster.peer={{ $item }} + {{- end }} + {{- end }} + {{- range $key, $value := .Values.extraArgs }} + - --{{ $key }}={{ $value }} + {{- end }} + ports: + - name: http + containerPort: 9093 + protocol: TCP + {{- if or (gt (int .Values.replicaCount) 1) (.Values.additionalPeers) }} + - name: clusterpeer-tcp + containerPort: {{ $svcClusterPort }} + protocol: TCP + - name: clusterpeer-udp + containerPort: {{ $svcClusterPort }} + protocol: UDP + {{- end }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + {{- if .Values.config.enabled }} + - name: config + mountPath: /etc/alertmanager + {{- end }} + {{- range .Values.extraSecretMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath }} + readOnly: {{ .readOnly }} + {{- end }} + - name: storage + mountPath: /alertmanager + {{- if .Values.extraVolumeMounts }} + {{- toYaml .Values.extraVolumeMounts | nindent 12 }} + {{- end }} + {{- with .Values.extraContainers }} + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + {{- if .Values.config.enabled }} + - name: config + configMap: + name: {{ include "alertmanager.fullname" . }} + {{- end }} + {{- range .Values.extraSecretMounts }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + {{- with .optional }} + optional: {{ . }} + {{- end }} + {{- end }} + {{- if .Values.extraVolumes }} + {{- toYaml .Values.extraVolumes | nindent 8 }} + {{- end }} + {{- if .Values.persistence.enabled }} + volumeClaimTemplates: + - metadata: + name: storage + spec: + accessModes: + {{- toYaml .Values.persistence.accessModes | nindent 10 }} + resources: + requests: + storage: {{ .Values.persistence.size }} + {{- if .Values.persistence.storageClass }} + {{- if (eq "-" .Values.persistence.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: {{ .Values.persistence.storageClass }} + {{- end }} + {{- end }} + {{- else }} + - name: storage + emptyDir: {} + {{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/tests/test-connection.yaml b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/tests/test-connection.yaml new file mode 100644 index 0000000..ace5f7d --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/templates/tests/test-connection.yaml @@ -0,0 +1,20 @@ +{{- if .Values.testFramework.enabled }} +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "alertmanager.fullname" . }}-test-connection" + labels: + {{- include "alertmanager.labels" . | nindent 4 }} + {{- with .Values.testFramework.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + namespace: {{ include "alertmanager.namespace" . }} +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "alertmanager.fullname" . }}:{{ .Values.service.port }}'] + restartPolicy: Never +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/unittests/__snapshot__/ingress_test.yaml.snap b/sregym/observer/prometheus/prometheus/charts/alertmanager/unittests/__snapshot__/ingress_test.yaml.snap new file mode 100644 index 0000000..2acdf20 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/unittests/__snapshot__/ingress_test.yaml.snap @@ -0,0 +1,25 @@ +should match snapshot of default values: + 1: | + apiVersion: networking.k8s.io/v1 + kind: Ingress + metadata: + labels: + app.kubernetes.io/instance: RELEASE-NAME + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/name: alertmanager + app.kubernetes.io/version: 1.0.0 + helm.sh/chart: alertmanager-1.0.0 + name: RELEASE-NAME-alertmanager + spec: + ingressClassName: nginx-test + rules: + - host: alertmanager.domain.com + http: + paths: + - backend: + service: + name: RELEASE-NAME-alertmanager + port: + number: 9093 + path: / + pathType: ImplementationSpecific diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/unittests/ingress_test.yaml b/sregym/observer/prometheus/prometheus/charts/alertmanager/unittests/ingress_test.yaml new file mode 100644 index 0000000..82f03b0 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/unittests/ingress_test.yaml @@ -0,0 +1,43 @@ +suite: test ingress +templates: + - ingress.yaml +tests: + - it: should be empty if ingress is not enabled + asserts: + - hasDocuments: + count: 0 + - it: should have apiVersion networking.k8s.io/v1 for k8s >= 1.19 + set: + ingress.enabled: true + capabilities: + majorVersion: 1 + minorVersion: 19 + asserts: + - hasDocuments: + count: 1 + - isKind: + of: Ingress + - isAPIVersion: + of: networking.k8s.io/v1 + - it: should have an ingressClassName for k8s >= 1.19 + set: + ingress.enabled: true + ingress.className: nginx-test + capabilities: + majorVersion: 1 + minorVersion: 19 + asserts: + - hasDocuments: + count: 1 + - equal: + path: spec.ingressClassName + value: nginx-test + - it: should match snapshot of default values + set: + ingress.enabled: true + ingress.className: nginx-test + chart: + version: 1.0.0 + appVersion: 1.0.0 + asserts: + - matchSnapshot: { } diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/values.schema.json b/sregym/observer/prometheus/prometheus/charts/alertmanager/values.schema.json new file mode 100644 index 0000000..612ce21 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/values.schema.json @@ -0,0 +1,915 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "title": "alertmanager", + "description": "The Alertmanager handles alerts sent by client applications such as the Prometheus server.", + "type": "object", + "required": [ + "replicaCount", + "image", + "serviceAccount", + "service", + "persistence", + "config" + ], + "definitions": { + "image": { + "description": "Container image parameters.", + "type": "object", + "required": ["repository"], + "additionalProperties": false, + "properties": { + "repository": { + "description": "Image repository. Path to the image with registry(quay.io) or without(prometheus/alertmanager) for docker.io.", + "type": "string" + }, + "pullPolicy": { + "description": "Image pull policy. Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. Cannot be updated.", + "type": "string", + "enum": [ + "Never", + "IfNotPresent", + "Always" + ], + "default": "IfNotPresent" + }, + "tag": { + "description": "Use chart appVersion by default.", + "type": "string", + "default": "" + } + } + }, + "resources": { + "description": "Resource limits and requests for the Container.", + "type": "object", + "properties": { + "limits": { + "description": "Resource limits for the Container.", + "type": "object", + "properties": { + "cpu": { + "description": "CPU request for the Container.", + "type": "string" + }, + "memory": { + "description": "Memory request for the Container.", + "type": "string" + } + } + }, + "requests": { + "description": "Resource requests for the Container.", + "type": "object", + "properties": { + "cpu": { + "description": "CPU request for the Container.", + "type": "string" + }, + "memory": { + "description": "Memory request for the Container.", + "type": "string" + } + } + } + } + }, + "securityContext": { + "description": "Security context for the container.", + "type": "object", + "properties": { + "capabilities": { + "description": "Specifies the capabilities to be dropped by the container.", + "type": "object", + "properties": { + "drop": { + "description": "List of capabilities to be dropped.", + "type": "array", + "items": { + "type": "string" + } + } + } + }, + "readOnlyRootFilesystem": { + "description": "Specifies whether the root file system should be mounted as read-only.", + "type": "boolean" + }, + "runAsUser": { + "description": "Specifies the UID (User ID) to run the container as.", + "type": "integer" + }, + "runAsNonRoot": { + "description": "Specifies whether to run the container as a non-root user.", + "type": "boolean" + }, + "runAsGroup": { + "description": "Specifies the GID (Group ID) to run the container as.", + "type": "integer" + } + } + }, + "volumeMounts": { + "description": "List of volume mounts for the Container.", + "type": "array", + "items": { + "description": "Volume mounts for the Container.", + "type": "object", + "required": ["name", "mountPath"], + "properties": { + "name": { + "description": "The name of the volume to mount.", + "type": "string" + }, + "mountPath": { + "description": "The mount path for the volume.", + "type": "string" + }, + "readOnly": { + "description": "Specifies if the volume should be mounted in read-only mode.", + "type": "boolean" + } + } + } + }, + "env": { + "description": "List of environment variables for the Container.", + "type": "array", + "items": { + "description": "Environment variables for the Container.", + "type": "object", + "required": ["name"], + "properties": { + "name": { + "description": "The name of the environment variable.", + "type": "string" + }, + "value": { + "description": "The value of the environment variable.", + "type": "string" + } + } + } + }, + "config": { + "description": "https://prometheus.io/docs/alerting/latest/configuration/", + "duration": { + "type": "string", + "pattern": "^((([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?|0)$" + }, + "labelname": { + "type": "string", + "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*$|^...$" + }, + "route": { + "description": "Alert routing configuration.", + "type": "object", + "properties": { + "receiver": { + "description": "The default receiver to send alerts to.", + "type": "string" + }, + "group_by": { + "description": "The labels by which incoming alerts are grouped together.", + "type": "array", + "items": { + "type": "string", + "$ref": "#/definitions/config/labelname" + } + }, + "continue": { + "description": "Whether an alert should continue matching subsequent sibling nodes.", + "type": "boolean", + "default": false + }, + "matchers": { + "description": "A list of matchers that an alert has to fulfill to match the node.", + "type": "array", + "items": { + "type": "string" + } + }, + "group_wait": { + "description": "How long to initially wait to send a notification for a group of alerts.", + "$ref": "#/definitions/config/duration" + }, + "group_interval": { + "description": "How long to wait before sending a notification about new alerts that are added to a group of alerts for which an initial notification has already been sent.", + "$ref": "#/definitions/config/duration" + }, + "repeat_interval": { + "description": "How long to wait before sending a notification again if it has already been sent successfully for an alert.", + "$ref": "#/definitions/config/duration" + }, + "mute_time_intervals": { + "description": "Times when the route should be muted.", + "type": "array", + "items": { + "type": "string" + } + }, + "active_time_intervals": { + "description": "Times when the route should be active.", + "type": "array", + "items": { + "type": "string" + } + }, + "routes": { + "description": "Zero or more child routes.", + "type": "array", + "items": { + "type": "object", + "$ref": "#/definitions/config/route" + } + } + } + } + } + }, + "properties": { + "replicaCount": { + "description": "Number of desired pods.", + "type": "integer", + "default": 1, + "minimum": 0 + }, + "image": { + "description": "Container image parameters.", + "$ref": "#/definitions/image" + }, + "extraArgs": { + "description": "Additional alertmanager container arguments. Use args without '--', only 'key: value' syntax.", + "type": "object", + "default": {} + }, + "extraSecretMounts": { + "description": "Additional Alertmanager Secret mounts.", + "type": "array", + "default": [], + "items": { + "type": "object", + "required": ["name", "mountPath", "secretName"], + "properties": { + "name": { + "type": "string" + }, + "mountPath": { + "type": "string" + }, + "subPath": { + "type": "string", + "default": "" + }, + "secretName": { + "type": "string" + }, + "readOnly": { + "type": "boolean", + "default": false + } + } + } + }, + "imagePullSecrets": { + "description": "The property allows you to configure multiple image pull secrets.", + "type": "array", + "default": [], + "items": { + "type": "object", + "required": ["name"], + "properties": { + "name": { + "description": "Specifies the Secret name of the image pull secret.", + "type": "string" + } + } + } + }, + "nameOverride": { + "description": "Override value for the name of the Helm chart.", + "type": "string", + "default": "" + }, + "fullnameOverride": { + "description": "Override value for the fully qualified app name.", + "type": "string", + "default": "" + }, + "namespaceOverride": { + "description": "Override deployment namespace.", + "type": "string", + "default": "" + }, + "automountServiceAccountToken": { + "description": "Specifies whether to automatically mount the ServiceAccount token into the Pod's filesystem.", + "type": "boolean", + "default": true + }, + "serviceAccount": { + "description": "Contains properties related to the service account configuration.", + "type": "object", + "required": ["create"], + "properties": { + "create": { + "description": "Specifies whether a service account should be created.", + "type": "boolean", + "default": true + }, + "annotations": { + "description": "Annotations to add to the service account.", + "type": "object", + "default": {} + }, + "name": { + "description": "The name of the service account to use. If not set and create is true, a name is generated using the fullname template.", + "type": "string", + "default": "" + } + } + }, + "schedulerName": { + "description": "Sets the schedulerName in the alertmanager pod.", + "type": "string", + "default": "" + }, + "priorityClassName": { + "description": "Sets the priorityClassName in the alertmanager pod.", + "type": "string", + "default": "" + }, + "podSecurityContext": { + "description": "Pod security context configuration.", + "type": "object", + "properties": { + "fsGroup": { + "description": "The fsGroup value for the pod's security context.", + "type": "integer", + "default": 65534 + }, + "runAsUser": { + "description": "The UID to run the pod's containers as.", + "type": "integer" + }, + "runAsGroup": { + "description": "The GID to run the pod's containers as.", + "type": "integer" + } + } + }, + "dnsConfig": { + "description": "DNS configuration for the pod.", + "type": "object", + "properties": { + "nameservers": { + "description": "List of DNS server IP addresses.", + "type": "array", + "items": { + "type": "string" + } + }, + "searches": { + "description": "List of DNS search domains.", + "type": "array", + "items": { + "type": "string" + } + }, + "options": { + "description": "List of DNS options.", + "type": "array", + "items": { + "description": "DNS options.", + "type": "object", + "required": ["name"], + "properties": { + "name": { + "description": "The name of the DNS option.", + "type": "string" + }, + "value": { + "description": "The value of the DNS option.", + "type": "string" + } + } + } + } + } + }, + "hostAliases": { + "description": "List of host aliases.", + "type": "array", + "items": { + "description": "Host aliases configuration.", + "type": "object", + "required": ["ip", "hostnames"], + "properties": { + "ip": { + "description": "IP address associated with the host alias.", + "type": "string" + }, + "hostnames": { + "description": "List of hostnames associated with the IP address.", + "type": "array", + "items": { + "type": "string" + } + } + } + } + }, + "securityContext": { + "description": "Security context for the container.", + "$ref": "#/definitions/securityContext" + }, + "additionalPeers": { + "description": "Additional peers for a alertmanager.", + "type": "array", + "items": { + "type": "string" + } + }, + "extraInitContainers": { + "description": "Additional InitContainers to initialize the pod.", + "type": "array", + "default": [], + "items": { + "required": ["name", "image"], + "properties": { + "name": { + "description": "The name of the InitContainer.", + "type": "string" + }, + "image": { + "description": "The container image to use for the InitContainer.", + "type": "string" + }, + "pullPolicy": { + "description": "Image pull policy. Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. Cannot be updated.", + "type": "string", + "enum": [ + "Never", + "IfNotPresent", + "Always" + ], + "default": "IfNotPresent" + }, + "command": { + "description": "The command to run in the InitContainer.", + "type": "array", + "items": { + "type": "string" + } + }, + "args": { + "description": "Additional command arguments for the InitContainer.", + "type": "array", + "items": { + "type": "string" + } + }, + "ports": { + "description": "List of ports to expose from the container.", + "type": "array", + "items": { + "type": "object" + } + }, + "env": { + "description": "List of environment variables for the InitContainer.", + "$ref": "#/definitions/env" + }, + "envFrom": { + "description": "List of sources to populate environment variables in the container.", + "type": "array", + "items": { + "type": "object" + } + }, + "volumeMounts": { + "description": "List of volume mounts for the InitContainer.", + "$ref": "#/definitions/volumeMounts" + }, + "resources": { + "description": "Resource requirements for the InitContainer.", + "$ref": "#/definitions/resources" + }, + "securityContext": { + "$ref": "#/definitions/securityContext", + "description": "The security context for the InitContainer." + } + } + } + }, + "extraContainers": { + "description": "Additional containers to add to the stateful set.", + "type": "array", + "default": [], + "items": { + "required": ["name", "image"], + "properties": { + "name": { + "description": "The name of the InitContainer.", + "type": "string" + }, + "image": { + "description": "The container image to use for the InitContainer.", + "type": "string" + }, + "pullPolicy": { + "description": "Image pull policy. Defaults to Always if :latest tag is specified, or IfNotPresent otherwise. Cannot be updated.", + "type": "string", + "enum": [ + "Never", + "IfNotPresent", + "Always" + ], + "default": "IfNotPresent" + }, + "command": { + "description": "The command to run in the InitContainer.", + "type": "array", + "items": { + "type": "string" + } + }, + "args": { + "description": "Additional command arguments for the InitContainer.", + "type": "array", + "items": { + "type": "string" + } + }, + "ports": { + "description": "List of ports to expose from the container.", + "type": "array", + "items": { + "type": "object" + } + }, + "env": { + "description": "List of environment variables for the InitContainer.", + "$ref": "#/definitions/env" + }, + "envFrom": { + "description": "List of sources to populate environment variables in the container.", + "type": "array", + "items": { + "type": "object" + } + }, + "volumeMounts": { + "description": "List of volume mounts for the InitContainer.", + "$ref": "#/definitions/volumeMounts" + }, + "resources": { + "description": "Resource requirements for the InitContainer.", + "$ref": "#/definitions/resources" + }, + "securityContext": { + "$ref": "#/definitions/securityContext", + "description": "The security context for the InitContainer." + } + } + } + }, + "resources": { + "description": "Resource limits and requests for the pod.", + "$ref": "#/definitions/resources" + }, + "livenessProbe": { + "description": "Liveness probe configuration.", + "type": "object" + }, + "readinessProbe": { + "description": "Readiness probe configuration.", + "type": "object" + }, + "service": { + "description": "Service configuration.", + "type": "object", + "required": ["type", "port"], + "properties": { + "annotations": { + "description": "Annotations to add to the service.", + "type": "object" + }, + "type": { + "description": "Service type.", + "type": "string" + }, + "port": { + "description": "Port number for the service.", + "type": "integer" + }, + "clusterPort": { + "description": "Port number for the cluster.", + "type": "integer" + }, + "loadBalancerIP": { + "description": "External IP to assign when the service type is LoadBalancer.", + "type": "string" + }, + "loadBalancerSourceRanges": { + "description": "IP ranges to allow access to the loadBalancerIP.", + "type": "array", + "items": { + "type": "string" + } + }, + "nodePort": { + "description": "Specific nodePort to force when service type is NodePort.", + "type": "integer" + } + } + }, + "ingress": { + "description": "Ingress configuration.", + "type": "object", + "properties": { + "enabled": { + "description": "Indicates if Ingress is enabled.", + "type": "boolean" + }, + "className": { + "description": "Ingress class name.", + "type": "string" + }, + "annotations": { + "description": "Annotations to add to the Ingress.", + "type": "object" + }, + "hosts": { + "description": "Host and path configuration for the Ingress.", + "type": "array", + "items": { + "type": "object", + "properties": { + "host": { + "description": "Host name for the Ingress.", + "type": "string" + }, + "paths": { + "description": "Path configuration for the Ingress.", + "type": "array", + "items": { + "type": "object", + "properties": { + "path": { + "description": "Path for the Ingress.", + "type": "string" + }, + "pathType": { + "description": "Path type for the Ingress.", + "type": "string" + } + } + } + } + } + } + }, + "tls": { + "description": "TLS configuration for the Ingress.", + "type": "array", + "items": { + "type": "object", + "properties": { + "secretName": { + "description": "Name of the secret for TLS.", + "type": "string" + }, + "hosts": { + "description": "Host names for the TLS configuration.", + "type": "array", + "items": { + "type": "string" + } + } + } + } + } + } + }, + "nodeSelector": { + "description": "Node selector for pod assignment.", + "type": "object" + }, + "tolerations": { + "description": "Tolerations for pod assignment.", + "type": "array" + }, + "affinity": { + "description": "Affinity rules for pod assignment.", + "type": "object" + }, + "podAntiAffinity": { + "description": "Pod anti-affinity configuration.", + "type": "string", + "enum": ["", "soft", "hard"], + "default": "" + }, + "podAntiAffinityTopologyKey": { + "description": "Topology key to use for pod anti-affinity.", + "type": "string" + }, + "topologySpreadConstraints": { + "description": "Topology spread constraints for pod assignment.", + "type": "array", + "items": { + "type": "object", + "required": ["maxSkew", "topologyKey", "whenUnsatisfiable", "labelSelector"], + "properties": { + "maxSkew": { + "type": "integer" + }, + "topologyKey": { + "type": "string" + }, + "whenUnsatisfiable": { + "type": "string", + "enum": ["DoNotSchedule", "ScheduleAnyway"] + }, + "labelSelector": { + "type": "object", + "required": ["matchLabels"], + "properties": { + "matchLabels": { + "type": "object" + } + } + } + } + } + }, + "statefulSet": { + "description": "StatefulSet configuration for managing pods.", + "type": "object", + "properties": { + "annotations": { + "type": "object" + } + } + }, + "podAnnotations": { + "description": "Annotations to add to the pods.", + "type": "object" + }, + "podLabels": { + "description": "Labels to add to the pods.", + "type": "object" + }, + "podDisruptionBudget": { + "description": "Pod disruption budget configuration.", + "type": "object", + "properties": { + "maxUnavailable": { + "type": "integer" + }, + "minAvailable": { + "type": "integer" + } + } + }, + "command": { + "description": "The command to be executed in the container.", + "type": "array", + "items": { + "type": "string" + } + }, + "persistence": { + "description": "Persistence configuration for storing data.", + "type": "object", + "required": ["enabled", "size"], + "properties": { + "enabled": { + "type": "boolean" + }, + "storageClass": { + "type": "string" + }, + "accessModes": { + "type": "array", + "items": { + "type": "string" + } + }, + "size": { + "type": "string" + } + } + }, + "configAnnotations": { + "description": "Annotations to be added to the Alertmanager configuration.", + "type": "object" + }, + "config": { + "description": "Alertmanager configuration.", + "type": "object", + "properties": { + "enabled": { + "description": "Whether to create alermanager configmap or not.", + "type": "boolean" + }, + "global": { + "description": "Global configuration options.", + "type": "object" + }, + "templates": { + "description": "Alertmanager template files.", + "type": "array", + "items": { + "type": "string" + } + }, + "receivers": { + "description": "Alert receivers configuration.", + "type": "array", + "items": { + "type": "object", + "required": ["name"], + "properties": { + "name": { + "description": "The unique name of the receiver.", + "type": "string" + } + } + } + }, + "route": { + "description": "Alert routing configuration.", + "type": "object", + "$ref": "#/definitions/config/route" + } + } + }, + "configmapReload": { + "description": "Monitors ConfigMap changes and POSTs to a URL.", + "type": "object", + "properties": { + "enabled": { + "description": "Specifies whether the configmap-reload container should be deployed.", + "type": "boolean", + "default": false + }, + "name": { + "description": "The name of the configmap-reload container.", + "type": "string" + }, + "image": { + "description": "The container image for the configmap-reload container.", + "$ref": "#/definitions/image" + }, + "containerPort": { + "description": "Port number for the configmap-reload container.", + "type": "integer" + }, + "resources": { + "description": "Resource requests and limits for the configmap-reload container.", + "$ref": "#/definitions/resources" + } + } + }, + "templates": { + "description": "Custom templates used by Alertmanager.", + "type": "object" + }, + "extraVolumeMounts": { + "description": "List of volume mounts for the Container.", + "$ref": "#/definitions/volumeMounts" + }, + "extraVolumes": { + "description": "Additional volumes to be mounted in the Alertmanager pod.", + "type": "array", + "default": [], + "items": { + "type": "object", + "required": ["name"], + "properties": { + "name": { + "type": "string" + } + } + } + }, + "extraEnv": { + "description": "List of environment variables for the Container.", + "$ref": "#/definitions/env" + }, + "testFramework": { + "description": "Configuration for the test Pod.", + "type": "object", + "properties": { + "enabled": { + "description": "Specifies whether the test Pod is enabled.", + "type": "boolean", + "default": false + }, + "annotations": { + "description": "Annotations to be added to the test Pod.", + "type": "object" + } + } + } + } +} diff --git a/sregym/observer/prometheus/prometheus/charts/alertmanager/values.yaml b/sregym/observer/prometheus/prometheus/charts/alertmanager/values.yaml new file mode 100644 index 0000000..de1415e --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/alertmanager/values.yaml @@ -0,0 +1,363 @@ +# yaml-language-server: $schema=values.schema.json +# Default values for alertmanager. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +replicaCount: 1 + +# Number of old history to retain to allow rollback +# Default Kubernetes value is set to 10 +revisionHistoryLimit: 10 + +image: + repository: quay.io/prometheus/alertmanager + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "" + +extraArgs: {} + +## Additional Alertmanager Secret mounts +# Defines additional mounts with secrets. Secrets must be manually created in the namespace. +extraSecretMounts: [] + # - name: secret-files + # mountPath: /etc/secrets + # subPath: "" + # secretName: alertmanager-secret-files + # readOnly: true + +imagePullSecrets: [] +nameOverride: "" +fullnameOverride: "" +## namespaceOverride overrides the namespace which the resources will be deployed in +namespaceOverride: "" + +automountServiceAccountToken: true + +serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +# Sets priorityClassName in alertmanager pod +priorityClassName: "" + +# Sets schedulerName in alertmanager pod +schedulerName: "" + +podSecurityContext: + fsGroup: 65534 +dnsConfig: {} + # nameservers: + # - 1.2.3.4 + # searches: + # - ns1.svc.cluster-domain.example + # - my.dns.search.suffix + # options: + # - name: ndots + # value: "2" + # - name: edns0 +hostAliases: [] + # - ip: "127.0.0.1" + # hostnames: + # - "foo.local" + # - "bar.local" + # - ip: "10.1.2.3" + # hostnames: + # - "foo.remote" + # - "bar.remote" +securityContext: + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + runAsUser: 65534 + runAsNonRoot: true + runAsGroup: 65534 + +additionalPeers: [] + +## Additional InitContainers to initialize the pod +## +extraInitContainers: [] + +## Additional containers to add to the stateful set. This will allow to setup sidecarContainers like a proxy to integrate +## alertmanager with an external tool like teams that has not direct integration. +## +extraContainers: [] + +livenessProbe: + httpGet: + path: / + port: http + +readinessProbe: + httpGet: + path: / + port: http + +service: + annotations: {} + labels: {} + type: ClusterIP + port: 9093 + clusterPort: 9094 + loadBalancerIP: "" # Assign ext IP when Service type is LoadBalancer + loadBalancerSourceRanges: [] # Only allow access to loadBalancerIP from these IPs + # if you want to force a specific nodePort. Must be use with service.type=NodePort + # nodePort: + + # Optionally specify extra list of additional ports exposed on both services + extraPorts: [] + +# Configuration for creating a separate Service for each statefulset Alertmanager replica +# +servicePerReplica: + enabled: false + annotations: {} + + # Loadbalancer source IP ranges + # Only used if servicePerReplica.type is "LoadBalancer" + loadBalancerSourceRanges: [] + + # Denotes if this Service desires to route external traffic to node-local or cluster-wide endpoints + # + externalTrafficPolicy: Cluster + + # Service type + # + type: ClusterIP + +ingress: + enabled: false + className: "" + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: alertmanager.domain.com + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - alertmanager.domain.com + +# Configuration for creating an Ingress that will map to each Alertmanager replica service +# alertmanager.servicePerReplica must be enabled +# +ingressPerReplica: + enabled: false + + # className for the ingresses + # + className: "" + + annotations: {} + labels: {} + + # Final form of the hostname for each per replica ingress is + # {{ ingressPerReplica.hostPrefix }}-{{ $replicaNumber }}.{{ ingressPerReplica.hostDomain }} + # + # Prefix for the per replica ingress that will have `-$replicaNumber` + # appended to the end + hostPrefix: "alertmanager" + # Domain that will be used for the per replica ingress + hostDomain: "domain.com" + + # Paths to use for ingress rules + # + paths: + - / + + # PathType for ingress rules + # + pathType: ImplementationSpecific + + # Secret name containing the TLS certificate for alertmanager per replica ingress + # Secret must be manually created in the namespace + tlsSecretName: "" + + # Separated secret for each per replica Ingress. Can be used together with cert-manager + # + tlsSecretPerReplica: + enabled: false + # Final form of the secret for each per replica ingress is + # {{ tlsSecretPerReplica.prefix }}-{{ $replicaNumber }} + # + prefix: "alertmanager" + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 10m + # memory: 32Mi + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +## Pod anti-affinity can prevent the scheduler from placing Alertmanager replicas on the same node. +## The default value "soft" means that the scheduler should *prefer* to not schedule two replica pods onto the same node but no guarantee is provided. +## The value "hard" means that the scheduler is *required* to not schedule two replica pods onto the same node. +## The value "" will disable pod anti-affinity so that no anti-affinity rules will be configured. +## +podAntiAffinity: "" + +## If anti-affinity is enabled sets the topologyKey to use for anti-affinity. +## This can be changed to, for example, failure-domain.beta.kubernetes.io/zone +## +podAntiAffinityTopologyKey: kubernetes.io/hostname + +## Topology spread constraints rely on node labels to identify the topology domain(s) that each Node is in. +## Ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/ +topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: failure-domain.beta.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule + # labelSelector: + # matchLabels: + # app.kubernetes.io/instance: alertmanager + +statefulSet: + annotations: {} + +podAnnotations: {} +podLabels: {} + +# Ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/ +podDisruptionBudget: {} + # maxUnavailable: 1 + # minAvailable: 1 + +command: [] + +persistence: + enabled: true + ## Persistent Volume Storage Class + ## If defined, storageClassName: + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. + ## + # storageClass: "-" + accessModes: + - ReadWriteOnce + size: 50Mi + +configAnnotations: {} + ## For example if you want to provide private data from a secret vault + ## https://github.com/banzaicloud/bank-vaults/tree/main/charts/vault-secrets-webhook + ## P.s.: Add option `configMapMutation: true` for vault-secrets-webhook + # vault.security.banzaicloud.io/vault-role: "admin" + # vault.security.banzaicloud.io/vault-addr: "https://vault.vault.svc.cluster.local:8200" + # vault.security.banzaicloud.io/vault-skip-verify: "true" + # vault.security.banzaicloud.io/vault-path: "kubernetes" + ## Example for inject secret + # slack_api_url: '${vault:secret/data/slack-hook-alerts#URL}' + +config: + enabled: true + global: {} + # slack_api_url: '' + + templates: + - '/etc/alertmanager/*.tmpl' + + receivers: + - name: default-receiver + # slack_configs: + # - channel: '@you' + # send_resolved: true + + route: + group_wait: 10s + group_interval: 5m + receiver: default-receiver + repeat_interval: 3h + +## Monitors ConfigMap changes and POSTs to a URL +## Ref: https://github.com/prometheus-operator/prometheus-operator/tree/main/cmd/prometheus-config-reloader +## +configmapReload: + ## If false, the configmap-reload container will not be deployed + ## + enabled: false + + ## configmap-reload container name + ## + name: configmap-reload + + ## configmap-reload container image + ## + image: + repository: quay.io/prometheus-operator/prometheus-config-reloader + tag: v0.66.0 + pullPolicy: IfNotPresent + + # containerPort: 9533 + + ## configmap-reload resource requests and limits + ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: {} + + extraArgs: {} + + ## Optionally specify extra list of additional volumeMounts + extraVolumeMounts: [] + # - name: extras + # mountPath: /usr/share/extras + # readOnly: true + + ## Optionally specify extra environment variables to add to alertmanager container + extraEnv: [] + # - name: FOO + # value: BAR + + securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsUser: 65534 + # runAsNonRoot: true + # runAsGroup: 65534 + +templates: {} +# alertmanager.tmpl: |- + +## Optionally specify extra list of additional volumeMounts +extraVolumeMounts: [] + # - name: extras + # mountPath: /usr/share/extras + # readOnly: true + +## Optionally specify extra list of additional volumes +extraVolumes: [] + # - name: extras + # emptyDir: {} + +## Optionally specify extra environment variables to add to alertmanager container +extraEnv: [] + # - name: FOO + # value: BAR + +testFramework: + enabled: false + annotations: + "helm.sh/hook": test-success + # "helm.sh/hook-delete-policy": "before-hook-creation,hook-succeeded" diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/.helmignore b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/.helmignore new file mode 100644 index 0000000..daebc7d --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/Chart.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/Chart.yaml new file mode 100644 index 0000000..b339545 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/Chart.yaml @@ -0,0 +1,26 @@ +apiVersion: v2 +name: kube-state-metrics +description: Install kube-state-metrics to generate and expose cluster-level metrics +keywords: +- metric +- monitoring +- prometheus +- kubernetes +type: application +version: 5.15.2 +appVersion: 2.3.0 +home: https://github.com/kubernetes/kube-state-metrics/ +sources: +- https://github.com/kubernetes/kube-state-metrics/ +maintainers: +- name: tariq1890 + email: tariq.ibrahim@mulesoft.com +- name: mrueg + email: manuel@rueg.eu +- name: dotdc + email: david@0xdc.me +annotations: + "artifacthub.io/license": Apache-2.0 + "artifacthub.io/links": | + - name: Chart Source + url: https://github.com/prometheus-community/helm-charts diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/README.md b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/README.md new file mode 100644 index 0000000..6697236 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/README.md @@ -0,0 +1,85 @@ +# kube-state-metrics Helm Chart + +Installs the [kube-state-metrics agent](https://github.com/kubernetes/kube-state-metrics). + +## Get Repository Info + +```console +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +``` + +_See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation._ + + +## Install Chart + +```console +helm install [RELEASE_NAME] prometheus-community/kube-state-metrics [flags] +``` + +_See [configuration](#configuration) below._ + +_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ + +## Uninstall Chart + +```console +helm uninstall [RELEASE_NAME] +``` + +This removes all the Kubernetes components associated with the chart and deletes the release. + +_See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall/) for command documentation._ + +## Upgrading Chart + +```console +helm upgrade [RELEASE_NAME] prometheus-community/kube-state-metrics [flags] +``` + +_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._ + +### Migrating from stable/kube-state-metrics and kubernetes/kube-state-metrics + +You can upgrade in-place: + +1. [get repository info](#get-repository-info) +1. [upgrade](#upgrading-chart) your existing release name using the new chart repository + +## Upgrading to v3.0.0 + +v3.0.0 includes kube-state-metrics v2.0, see the [changelog](https://github.com/kubernetes/kube-state-metrics/blob/release-2.0/CHANGELOG.md) for major changes on the application-side. + +The upgraded chart now the following changes: + +* Dropped support for helm v2 (helm v3 or later is required) +* collectors key was renamed to resources +* namespace key was renamed to namespaces + +## Configuration + +See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments: + +```console +helm show values prometheus-community/kube-state-metrics +``` + +### kube-rbac-proxy + +You can enable `kube-state-metrics` endpoint protection using `kube-rbac-proxy`. By setting `kubeRBACProxy.enabled: true`, this chart will deploy one RBAC proxy container per endpoint (metrics & telemetry). +To authorize access, authenticate your requests (via a `ServiceAccount` for example) with a `ClusterRole` attached such as: + +```yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kube-state-metrics-read +rules: + - apiGroups: [ "" ] + resources: ["services/kube-state-metrics"] + verbs: + - get +``` + +See [kube-rbac-proxy examples](https://github.com/brancz/kube-rbac-proxy/tree/master/examples/resource-attributes) for more details. diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/NOTES.txt b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/NOTES.txt new file mode 100644 index 0000000..2fd121e --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/NOTES.txt @@ -0,0 +1,23 @@ +kube-state-metrics is a simple service that listens to the Kubernetes API server and generates metrics about the state of the objects. +The exposed metrics can be found here: +https://github.com/kubernetes/kube-state-metrics/blob/master/docs/README.md#exposed-metrics + +The metrics are exported on the HTTP endpoint /metrics on the listening port. +In your case, {{ template "kube-state-metrics.fullname" . }}.{{ template "kube-state-metrics.namespace" . }}.svc.cluster.local:{{ .Values.service.port }}/metrics + +They are served either as plaintext or protobuf depending on the Accept header. +They are designed to be consumed either by Prometheus itself or by a scraper that is compatible with scraping a Prometheus client endpoint. + +{{- if .Values.kubeRBACProxy.enabled}} + +kube-rbac-proxy endpoint protections is enabled: +- Metrics endpoints are now HTTPS +- Ensure that the client authenticates the requests (e.g. via service account) with the following role permissions: +``` +rules: + - apiGroups: [ "" ] + resources: ["services/{{ template "kube-state-metrics.fullname" . }}"] + verbs: + - get +``` +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/_helpers.tpl b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/_helpers.tpl new file mode 100644 index 0000000..4cb47db --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/_helpers.tpl @@ -0,0 +1,156 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "kube-state-metrics.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "kube-state-metrics.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create the name of the service account to use +*/}} +{{- define "kube-state-metrics.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} + {{ default (include "kube-state-metrics.fullname" .) .Values.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.serviceAccount.name }} +{{- end -}} +{{- end -}} + +{{/* +Allow the release namespace to be overridden for multi-namespace deployments in combined charts +*/}} +{{- define "kube-state-metrics.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "kube-state-metrics.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Generate basic labels +*/}} +{{- define "kube-state-metrics.labels" }} +helm.sh/chart: {{ template "kube-state-metrics.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/component: metrics +app.kubernetes.io/part-of: {{ template "kube-state-metrics.name" . }} +{{- include "kube-state-metrics.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +{{- if .Values.customLabels }} +{{ toYaml .Values.customLabels }} +{{- end }} +{{- if .Values.releaseLabel }} +release: {{ .Release.Name }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "kube-state-metrics.selectorLabels" }} +{{- if .Values.selectorOverride }} +{{ toYaml .Values.selectorOverride }} +{{- else }} +app.kubernetes.io/name: {{ include "kube-state-metrics.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} +{{- end }} + +{{/* Sets default scrape limits for servicemonitor */}} +{{- define "servicemonitor.scrapeLimits" -}} +{{- with .sampleLimit }} +sampleLimit: {{ . }} +{{- end }} +{{- with .targetLimit }} +targetLimit: {{ . }} +{{- end }} +{{- with .labelLimit }} +labelLimit: {{ . }} +{{- end }} +{{- with .labelNameLengthLimit }} +labelNameLengthLimit: {{ . }} +{{- end }} +{{- with .labelValueLengthLimit }} +labelValueLengthLimit: {{ . }} +{{- end }} +{{- end -}} + +{{/* +Formats imagePullSecrets. Input is (dict "Values" .Values "imagePullSecrets" .{specific imagePullSecrets}) +*/}} +{{- define "kube-state-metrics.imagePullSecrets" -}} +{{- range (concat .Values.global.imagePullSecrets .imagePullSecrets) }} + {{- if eq (typeOf .) "map[string]interface {}" }} +- {{ toYaml . | trim }} + {{- else }} +- name: {{ . }} + {{- end }} +{{- end }} +{{- end -}} + +{{/* +The image to use for kube-state-metrics +*/}} +{{- define "kube-state-metrics.image" -}} +{{- if .Values.image.sha }} +{{- if .Values.global.imageRegistry }} +{{- printf "%s/%s:%s@%s" .Values.global.imageRegistry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) .Values.image.sha }} +{{- else }} +{{- printf "%s/%s:%s@%s" .Values.image.registry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) .Values.image.sha }} +{{- end }} +{{- else }} +{{- if .Values.global.imageRegistry }} +{{- printf "%s/%s:%s" .Values.global.imageRegistry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }} +{{- else }} +{{- printf "%s/%s:%s" .Values.image.registry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +The image to use for kubeRBACProxy +*/}} +{{- define "kubeRBACProxy.image" -}} +{{- if .Values.kubeRBACProxy.image.sha }} +{{- if .Values.global.imageRegistry }} +{{- printf "%s/%s:%s@%s" .Values.global.imageRegistry .Values.kubeRBACProxy.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.kubeRBACProxy.image.tag) .Values.kubeRBACProxy.image.sha }} +{{- else }} +{{- printf "%s/%s:%s@%s" .Values.kubeRBACProxy.image.registry .Values.kubeRBACProxy.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.kubeRBACProxy.image.tag) .Values.kubeRBACProxy.image.sha }} +{{- end }} +{{- else }} +{{- if .Values.global.imageRegistry }} +{{- printf "%s/%s:%s" .Values.global.imageRegistry .Values.kubeRBACProxy.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.kubeRBACProxy.image.tag) }} +{{- else }} +{{- printf "%s/%s:%s" .Values.kubeRBACProxy.image.registry .Values.kubeRBACProxy.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.kubeRBACProxy.image.tag) }} +{{- end }} +{{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/ciliumnetworkpolicy.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/ciliumnetworkpolicy.yaml new file mode 100644 index 0000000..35adcf4 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/ciliumnetworkpolicy.yaml @@ -0,0 +1,33 @@ +{{- if and .Values.networkPolicy.enabled (eq .Values.networkPolicy.flavor "cilium") }} +apiVersion: cilium.io/v2 +kind: CiliumNetworkPolicy +metadata: + {{- if .Values.annotations }} + annotations: + {{ toYaml .Values.annotations | nindent 4 }} + {{- end }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + name: {{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} +spec: + endpointSelector: + matchLabels: + {{- include "kube-state-metrics.selectorLabels" . | indent 6 }} + egress: + {{- if and .Values.networkPolicy.cilium .Values.networkPolicy.cilium.kubeApiServerSelector }} + {{ toYaml .Values.networkPolicy.cilium.kubeApiServerSelector | nindent 6 }} + {{- else }} + - toEntities: + - kube-apiserver + {{- end }} + ingress: + - toPorts: + - ports: + - port: {{ .Values.service.port | quote }} + protocol: TCP + {{- if .Values.selfMonitor.enabled }} + - port: {{ .Values.selfMonitor.telemetryPort | default 8081 | quote }} + protocol: TCP + {{ end }} +{{ end }} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/clusterrolebinding.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/clusterrolebinding.yaml new file mode 100644 index 0000000..fa89bed --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/clusterrolebinding.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.rbac.create .Values.rbac.useClusterRole -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + name: {{ template "kube-state-metrics.fullname" . }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole +{{- if .Values.rbac.useExistingRole }} + name: {{ .Values.rbac.useExistingRole }} +{{- else }} + name: {{ template "kube-state-metrics.fullname" . }} +{{- end }} +subjects: +- kind: ServiceAccount + name: {{ template "kube-state-metrics.serviceAccountName" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/crs-configmap.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/crs-configmap.yaml new file mode 100644 index 0000000..da1757d --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/crs-configmap.yaml @@ -0,0 +1,16 @@ +{{- if .Values.customResourceState.enabled}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "kube-state-metrics.fullname" . }}-customresourcestate-config + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + {{- if .Values.annotations }} + annotations: + {{ toYaml .Values.annotations | nindent 4 }} + {{- end }} +data: + config.yaml: | + {{- toYaml .Values.customResourceState.config | nindent 4 }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/deployment.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/deployment.yaml new file mode 100644 index 0000000..5dde3b7 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/deployment.yaml @@ -0,0 +1,290 @@ +apiVersion: apps/v1 +{{- if .Values.autosharding.enabled }} +kind: StatefulSet +{{- else }} +kind: Deployment +{{- end }} +metadata: + name: {{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + {{- if .Values.annotations }} + annotations: +{{ toYaml .Values.annotations | indent 4 }} + {{- end }} +spec: + selector: + matchLabels: + {{- include "kube-state-metrics.selectorLabels" . | indent 6 }} + replicas: {{ .Values.replicas }} + {{- if not .Values.autosharding.enabled }} + strategy: + type: {{ .Values.updateStrategy | default "RollingUpdate" }} + {{- end }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + {{- if .Values.autosharding.enabled }} + serviceName: {{ template "kube-state-metrics.fullname" . }} + volumeClaimTemplates: [] + {{- end }} + template: + metadata: + labels: + {{- include "kube-state-metrics.labels" . | indent 8 }} + {{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + hostNetwork: {{ .Values.hostNetwork }} + serviceAccountName: {{ template "kube-state-metrics.serviceAccountName" . }} + {{- if .Values.securityContext.enabled }} + securityContext: {{- omit .Values.securityContext "enabled" | toYaml | nindent 8 }} + {{- end }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + {{- with .Values.initContainers }} + initContainers: + {{- toYaml . | nindent 6 }} + {{- end }} + containers: + {{- $httpPort := ternary 9090 (.Values.service.port | default 8080) .Values.kubeRBACProxy.enabled}} + {{- $telemetryPort := ternary 9091 (.Values.selfMonitor.telemetryPort | default 8081) .Values.kubeRBACProxy.enabled}} + - name: {{ template "kube-state-metrics.name" . }} + {{- if .Values.autosharding.enabled }} + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + {{- end }} + args: + {{- if .Values.extraArgs }} + {{- .Values.extraArgs | toYaml | nindent 8 }} + {{- end }} + - --port={{ $httpPort }} + {{- if .Values.collectors }} + - --resources={{ .Values.collectors | join "," }} + {{- end }} + {{- if .Values.metricLabelsAllowlist }} + - --metric-labels-allowlist={{ .Values.metricLabelsAllowlist | join "," }} + {{- end }} + {{- if .Values.metricAnnotationsAllowList }} + - --metric-annotations-allowlist={{ .Values.metricAnnotationsAllowList | join "," }} + {{- end }} + {{- if .Values.metricAllowlist }} + - --metric-allowlist={{ .Values.metricAllowlist | join "," }} + {{- end }} + {{- if .Values.metricDenylist }} + - --metric-denylist={{ .Values.metricDenylist | join "," }} + {{- end }} + {{- $namespaces := list }} + {{- if .Values.namespaces }} + {{- range $ns := join "," .Values.namespaces | split "," }} + {{- $namespaces = append $namespaces (tpl $ns $) }} + {{- end }} + {{- end }} + {{- if .Values.releaseNamespace }} + {{- $namespaces = append $namespaces ( include "kube-state-metrics.namespace" . ) }} + {{- end }} + {{- if $namespaces }} + - --namespaces={{ $namespaces | mustUniq | join "," }} + {{- end }} + {{- if .Values.namespacesDenylist }} + - --namespaces-denylist={{ tpl (.Values.namespacesDenylist | join ",") $ }} + {{- end }} + {{- if .Values.autosharding.enabled }} + - --pod=$(POD_NAME) + - --pod-namespace=$(POD_NAMESPACE) + {{- end }} + {{- if .Values.kubeconfig.enabled }} + - --kubeconfig=/opt/k8s/.kube/config + {{- end }} + {{- if .Values.kubeRBACProxy.enabled }} + - --telemetry-host=127.0.0.1 + - --telemetry-port={{ $telemetryPort }} + {{- else }} + {{- if .Values.selfMonitor.telemetryHost }} + - --telemetry-host={{ .Values.selfMonitor.telemetryHost }} + {{- end }} + {{- if .Values.selfMonitor.telemetryPort }} + - --telemetry-port={{ $telemetryPort }} + {{- end }} + {{- if .Values.customResourceState.enabled }} + - --custom-resource-state-config-file=/etc/customresourcestate/config.yaml + {{- end }} + {{- end }} + {{- if or (.Values.kubeconfig.enabled) (.Values.customResourceState.enabled) (.Values.volumeMounts) }} + volumeMounts: + {{- if .Values.kubeconfig.enabled }} + - name: kubeconfig + mountPath: /opt/k8s/.kube/ + readOnly: true + {{- end }} + {{- if .Values.customResourceState.enabled }} + - name: customresourcestate-config + mountPath: /etc/customresourcestate + readOnly: true + {{- end }} + {{- if .Values.volumeMounts }} +{{ toYaml .Values.volumeMounts | indent 8 }} + {{- end }} + {{- end }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + image: {{ include "kube-state-metrics.image" . }} + {{- if eq .Values.kubeRBACProxy.enabled false }} + ports: + - containerPort: {{ .Values.service.port | default 8080}} + name: "http" + {{- if .Values.selfMonitor.enabled }} + - containerPort: {{ $telemetryPort }} + name: "metrics" + {{- end }} + {{- end }} + livenessProbe: + httpGet: + path: /healthz + port: {{ $httpPort }} + initialDelaySeconds: 5 + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: / + port: {{ $httpPort }} + initialDelaySeconds: 5 + timeoutSeconds: 5 + {{- if .Values.resources }} + resources: +{{ toYaml .Values.resources | indent 10 }} +{{- end }} +{{- if .Values.containerSecurityContext }} + securityContext: +{{ toYaml .Values.containerSecurityContext | indent 10 }} +{{- end }} + {{- if .Values.kubeRBACProxy.enabled }} + - name: kube-rbac-proxy-http + args: + {{- if .Values.kubeRBACProxy.extraArgs }} + {{- .Values.kubeRBACProxy.extraArgs | toYaml | nindent 8 }} + {{- end }} + - --secure-listen-address=:{{ .Values.service.port | default 8080}} + - --upstream=http://127.0.0.1:{{ $httpPort }}/ + - --proxy-endpoints-port=8888 + - --config-file=/etc/kube-rbac-proxy-config/config-file.yaml + volumeMounts: + - name: kube-rbac-proxy-config + mountPath: /etc/kube-rbac-proxy-config + {{- with .Values.kubeRBACProxy.volumeMounts }} + {{- toYaml . | nindent 10 }} + {{- end }} + imagePullPolicy: {{ .Values.kubeRBACProxy.image.pullPolicy }} + image: {{ include "kubeRBACProxy.image" . }} + ports: + - containerPort: {{ .Values.service.port | default 8080}} + name: "http" + - containerPort: 8888 + name: "http-healthz" + readinessProbe: + httpGet: + scheme: HTTPS + port: 8888 + path: healthz + initialDelaySeconds: 5 + timeoutSeconds: 5 + {{- if .Values.kubeRBACProxy.resources }} + resources: +{{ toYaml .Values.kubeRBACProxy.resources | indent 10 }} +{{- end }} +{{- if .Values.kubeRBACProxy.containerSecurityContext }} + securityContext: +{{ toYaml .Values.kubeRBACProxy.containerSecurityContext | indent 10 }} +{{- end }} + {{- if .Values.selfMonitor.enabled }} + - name: kube-rbac-proxy-telemetry + args: + {{- if .Values.kubeRBACProxy.extraArgs }} + {{- .Values.kubeRBACProxy.extraArgs | toYaml | nindent 8 }} + {{- end }} + - --secure-listen-address=:{{ .Values.selfMonitor.telemetryPort | default 8081 }} + - --upstream=http://127.0.0.1:{{ $telemetryPort }}/ + - --proxy-endpoints-port=8889 + - --config-file=/etc/kube-rbac-proxy-config/config-file.yaml + volumeMounts: + - name: kube-rbac-proxy-config + mountPath: /etc/kube-rbac-proxy-config + {{- with .Values.kubeRBACProxy.volumeMounts }} + {{- toYaml . | nindent 10 }} + {{- end }} + imagePullPolicy: {{ .Values.kubeRBACProxy.image.pullPolicy }} + image: {{ include "kubeRBACProxy.image" . }} + ports: + - containerPort: {{ .Values.selfMonitor.telemetryPort | default 8081 }} + name: "metrics" + - containerPort: 8889 + name: "metrics-healthz" + readinessProbe: + httpGet: + scheme: HTTPS + port: 8889 + path: healthz + initialDelaySeconds: 5 + timeoutSeconds: 5 + {{- if .Values.kubeRBACProxy.resources }} + resources: +{{ toYaml .Values.kubeRBACProxy.resources | indent 10 }} +{{- end }} +{{- if .Values.kubeRBACProxy.containerSecurityContext }} + securityContext: +{{ toYaml .Values.kubeRBACProxy.containerSecurityContext | indent 10 }} +{{- end }} + {{- end }} + {{- end }} + {{- with .Values.containers }} + {{- toYaml . | nindent 6 }} + {{- end }} +{{- if or .Values.imagePullSecrets .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- include "kube-state-metrics.imagePullSecrets" (dict "Values" .Values "imagePullSecrets" .Values.imagePullSecrets) | indent 8 }} + {{- end }} + {{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 8 }} + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} + {{- end }} + {{- if .Values.topologySpreadConstraints }} + topologySpreadConstraints: +{{ toYaml .Values.topologySpreadConstraints | indent 8 }} + {{- end }} + {{- if or (.Values.kubeconfig.enabled) (.Values.customResourceState.enabled) (.Values.volumes) (.Values.kubeRBACProxy.enabled) }} + volumes: + {{- if .Values.kubeconfig.enabled}} + - name: kubeconfig + secret: + secretName: {{ template "kube-state-metrics.fullname" . }}-kubeconfig + {{- end }} + {{- if .Values.kubeRBACProxy.enabled}} + - name: kube-rbac-proxy-config + configMap: + name: {{ template "kube-state-metrics.fullname" . }}-rbac-config + {{- end }} + {{- if .Values.customResourceState.enabled}} + - name: customresourcestate-config + configMap: + name: {{ template "kube-state-metrics.fullname" . }}-customresourcestate-config + {{- end }} + {{- if .Values.volumes }} +{{ toYaml .Values.volumes | indent 8 }} + {{- end }} + {{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/extra-manifests.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/extra-manifests.yaml new file mode 100644 index 0000000..f0c859e --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/extra-manifests.yaml @@ -0,0 +1,4 @@ +{{ range .Values.extraManifests }} +--- +{{ tpl (toYaml .) $ }} +{{ end }} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/kubeconfig-secret.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/kubeconfig-secret.yaml new file mode 100644 index 0000000..c0a3e45 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/kubeconfig-secret.yaml @@ -0,0 +1,12 @@ +{{- if .Values.kubeconfig.enabled -}} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "kube-state-metrics.fullname" . }}-kubeconfig + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} +type: Opaque +data: + config: '{{ .Values.kubeconfig.secret }}' +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/networkpolicy.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/networkpolicy.yaml new file mode 100644 index 0000000..ea937c2 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/networkpolicy.yaml @@ -0,0 +1,43 @@ +{{- if and .Values.networkPolicy.enabled (eq .Values.networkPolicy.flavor "kubernetes") }} +kind: NetworkPolicy +apiVersion: networking.k8s.io/v1 +metadata: + {{- if .Values.annotations }} + annotations: + {{ toYaml .Values.annotations | nindent 4 }} + {{- end }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + name: {{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} +spec: + {{- if .Values.networkPolicy.egress }} + ## Deny all egress by default + egress: + {{- toYaml .Values.networkPolicy.egress | nindent 4 }} + {{- end }} + ingress: + {{- if .Values.networkPolicy.ingress }} + {{- toYaml .Values.networkPolicy.ingress | nindent 4 }} + {{- else }} + ## Allow ingress on default ports by default + - ports: + - port: {{ .Values.service.port | default 8080 }} + protocol: TCP + {{- if .Values.selfMonitor.enabled }} + {{- $telemetryPort := ternary 9091 (.Values.selfMonitor.telemetryPort | default 8081) .Values.kubeRBACProxy.enabled}} + - port: {{ $telemetryPort }} + protocol: TCP + {{- end }} + {{- end }} + podSelector: + {{- if .Values.networkPolicy.podSelector }} + {{- toYaml .Values.networkPolicy.podSelector | nindent 4 }} + {{- else }} + matchLabels: + {{- include "kube-state-metrics.selectorLabels" . | indent 6 }} + {{- end }} + policyTypes: + - Ingress + - Egress +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/pdb.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/pdb.yaml new file mode 100644 index 0000000..13d5422 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/pdb.yaml @@ -0,0 +1,18 @@ +{{- if .Values.podDisruptionBudget -}} +{{ if $.Capabilities.APIVersions.Has "policy/v1/PodDisruptionBudget" -}} +apiVersion: policy/v1 +{{- else -}} +apiVersion: policy/v1beta1 +{{- end }} +kind: PodDisruptionBudget +metadata: + name: {{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} +spec: + selector: + matchLabels: + app.kubernetes.io/name: {{ template "kube-state-metrics.name" . }} +{{ toYaml .Values.podDisruptionBudget | indent 2 }} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/podsecuritypolicy.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/podsecuritypolicy.yaml new file mode 100644 index 0000000..20a9cbe --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/podsecuritypolicy.yaml @@ -0,0 +1,39 @@ +{{- if and .Values.podSecurityPolicy.enabled (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "kube-state-metrics.fullname" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} +{{- if .Values.podSecurityPolicy.annotations }} + annotations: +{{ toYaml .Values.podSecurityPolicy.annotations | indent 4 }} +{{- end }} +spec: + privileged: false + volumes: + - 'secret' +{{- if .Values.podSecurityPolicy.additionalVolumes }} +{{ toYaml .Values.podSecurityPolicy.additionalVolumes | indent 4 }} +{{- end }} + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAsNonRoot' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + # Forbid adding the root group. + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + # Forbid adding the root group. + - min: 1 + max: 65535 + readOnlyRootFilesystem: false +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/psp-clusterrole.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/psp-clusterrole.yaml new file mode 100644 index 0000000..e003c1f --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/psp-clusterrole.yaml @@ -0,0 +1,19 @@ +{{- if and .Values.podSecurityPolicy.enabled (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + name: psp-{{ template "kube-state-metrics.fullname" . }} +rules: +{{- $kubeTargetVersion := default .Capabilities.KubeVersion.GitVersion .Values.kubeTargetVersionOverride }} +{{- if semverCompare "> 1.15.0-0" $kubeTargetVersion }} +- apiGroups: ['policy'] +{{- else }} +- apiGroups: ['extensions'] +{{- end }} + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "kube-state-metrics.fullname" . }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/psp-clusterrolebinding.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/psp-clusterrolebinding.yaml new file mode 100644 index 0000000..8783748 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/psp-clusterrolebinding.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.podSecurityPolicy.enabled (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + name: psp-{{ template "kube-state-metrics.fullname" . }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: psp-{{ template "kube-state-metrics.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ template "kube-state-metrics.serviceAccountName" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/rbac-configmap.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/rbac-configmap.yaml new file mode 100644 index 0000000..1489768 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/rbac-configmap.yaml @@ -0,0 +1,22 @@ +{{- if .Values.kubeRBACProxy.enabled}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "kube-state-metrics.fullname" . }}-rbac-config + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + {{- if .Values.annotations }} + annotations: + {{ toYaml .Values.annotations | nindent 4 }} + {{- end }} +data: + config-file.yaml: |+ + authorization: + resourceAttributes: + namespace: {{ template "kube-state-metrics.namespace" . }} + apiVersion: v1 + resource: services + subresource: {{ template "kube-state-metrics.fullname" . }} + name: {{ template "kube-state-metrics.fullname" . }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/role.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/role.yaml new file mode 100644 index 0000000..5b3aaf1 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/role.yaml @@ -0,0 +1,212 @@ +{{- if and (eq .Values.rbac.create true) (not .Values.rbac.useExistingRole) -}} +{{- range (ternary (join "," .Values.namespaces | split "," ) (list "") (eq $.Values.rbac.useClusterRole false)) }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +{{- if eq $.Values.rbac.useClusterRole false }} +kind: Role +{{- else }} +kind: ClusterRole +{{- end }} +metadata: + labels: + {{- include "kube-state-metrics.labels" $ | indent 4 }} + name: {{ template "kube-state-metrics.fullname" $ }} +{{- if eq $.Values.rbac.useClusterRole false }} + namespace: {{ . }} +{{- end }} +rules: +{{ if has "certificatesigningrequests" $.Values.collectors }} +- apiGroups: ["certificates.k8s.io"] + resources: + - certificatesigningrequests + verbs: ["list", "watch"] +{{ end -}} +{{ if has "configmaps" $.Values.collectors }} +- apiGroups: [""] + resources: + - configmaps + verbs: ["list", "watch"] +{{ end -}} +{{ if has "cronjobs" $.Values.collectors }} +- apiGroups: ["batch"] + resources: + - cronjobs + verbs: ["list", "watch"] +{{ end -}} +{{ if has "daemonsets" $.Values.collectors }} +- apiGroups: ["extensions", "apps"] + resources: + - daemonsets + verbs: ["list", "watch"] +{{ end -}} +{{ if has "deployments" $.Values.collectors }} +- apiGroups: ["extensions", "apps"] + resources: + - deployments + verbs: ["list", "watch"] +{{ end -}} +{{ if has "endpoints" $.Values.collectors }} +- apiGroups: [""] + resources: + - endpoints + verbs: ["list", "watch"] +{{ end -}} +{{ if has "endpointslices" $.Values.collectors }} +- apiGroups: ["discovery.k8s.io"] + resources: + - endpointslices + verbs: ["list", "watch"] +{{ end -}} +{{ if has "horizontalpodautoscalers" $.Values.collectors }} +- apiGroups: ["autoscaling"] + resources: + - horizontalpodautoscalers + verbs: ["list", "watch"] +{{ end -}} +{{ if has "ingresses" $.Values.collectors }} +- apiGroups: ["extensions", "networking.k8s.io"] + resources: + - ingresses + verbs: ["list", "watch"] +{{ end -}} +{{ if has "jobs" $.Values.collectors }} +- apiGroups: ["batch"] + resources: + - jobs + verbs: ["list", "watch"] +{{ end -}} +{{ if has "leases" $.Values.collectors }} +- apiGroups: ["coordination.k8s.io"] + resources: + - leases + verbs: ["list", "watch"] +{{ end -}} +{{ if has "limitranges" $.Values.collectors }} +- apiGroups: [""] + resources: + - limitranges + verbs: ["list", "watch"] +{{ end -}} +{{ if has "mutatingwebhookconfigurations" $.Values.collectors }} +- apiGroups: ["admissionregistration.k8s.io"] + resources: + - mutatingwebhookconfigurations + verbs: ["list", "watch"] +{{ end -}} +{{ if has "namespaces" $.Values.collectors }} +- apiGroups: [""] + resources: + - namespaces + verbs: ["list", "watch"] +{{ end -}} +{{ if has "networkpolicies" $.Values.collectors }} +- apiGroups: ["networking.k8s.io"] + resources: + - networkpolicies + verbs: ["list", "watch"] +{{ end -}} +{{ if has "nodes" $.Values.collectors }} +- apiGroups: [""] + resources: + - nodes + verbs: ["list", "watch"] +{{ end -}} +{{ if has "persistentvolumeclaims" $.Values.collectors }} +- apiGroups: [""] + resources: + - persistentvolumeclaims + verbs: ["list", "watch"] +{{ end -}} +{{ if has "persistentvolumes" $.Values.collectors }} +- apiGroups: [""] + resources: + - persistentvolumes + verbs: ["list", "watch"] +{{ end -}} +{{ if has "poddisruptionbudgets" $.Values.collectors }} +- apiGroups: ["policy"] + resources: + - poddisruptionbudgets + verbs: ["list", "watch"] +{{ end -}} +{{ if has "pods" $.Values.collectors }} +- apiGroups: [""] + resources: + - pods + verbs: ["list", "watch"] +{{ end -}} +{{ if has "replicasets" $.Values.collectors }} +- apiGroups: ["extensions", "apps"] + resources: + - replicasets + verbs: ["list", "watch"] +{{ end -}} +{{ if has "replicationcontrollers" $.Values.collectors }} +- apiGroups: [""] + resources: + - replicationcontrollers + verbs: ["list", "watch"] +{{ end -}} +{{ if has "resourcequotas" $.Values.collectors }} +- apiGroups: [""] + resources: + - resourcequotas + verbs: ["list", "watch"] +{{ end -}} +{{ if has "secrets" $.Values.collectors }} +- apiGroups: [""] + resources: + - secrets + verbs: ["list", "watch"] +{{ end -}} +{{ if has "services" $.Values.collectors }} +- apiGroups: [""] + resources: + - services + verbs: ["list", "watch"] +{{ end -}} +{{ if has "statefulsets" $.Values.collectors }} +- apiGroups: ["apps"] + resources: + - statefulsets + verbs: ["list", "watch"] +{{ end -}} +{{ if has "storageclasses" $.Values.collectors }} +- apiGroups: ["storage.k8s.io"] + resources: + - storageclasses + verbs: ["list", "watch"] +{{ end -}} +{{ if has "validatingwebhookconfigurations" $.Values.collectors }} +- apiGroups: ["admissionregistration.k8s.io"] + resources: + - validatingwebhookconfigurations + verbs: ["list", "watch"] +{{ end -}} +{{ if has "volumeattachments" $.Values.collectors }} +- apiGroups: ["storage.k8s.io"] + resources: + - volumeattachments + verbs: ["list", "watch"] +{{ end -}} +{{- if $.Values.kubeRBACProxy.enabled }} +- apiGroups: ["authentication.k8s.io"] + resources: + - tokenreviews + verbs: ["create"] +- apiGroups: ["authorization.k8s.io"] + resources: + - subjectaccessreviews + verbs: ["create"] +{{- end }} +{{- if $.Values.customResourceState.enabled }} +- apiGroups: ["apiextensions.k8s.io"] + resources: + - customresourcedefinitions + verbs: ["list", "watch"] +{{- end }} +{{ if $.Values.rbac.extraRules }} +{{ toYaml $.Values.rbac.extraRules }} +{{ end }} +{{- end -}} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/rolebinding.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/rolebinding.yaml new file mode 100644 index 0000000..c3d1d22 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/rolebinding.yaml @@ -0,0 +1,24 @@ +{{- if and (eq .Values.rbac.create true) (eq .Values.rbac.useClusterRole false) -}} +{{- range (join "," $.Values.namespaces) | split "," }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + {{- include "kube-state-metrics.labels" $ | indent 4 }} + name: {{ template "kube-state-metrics.fullname" $ }} + namespace: {{ . }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role +{{- if (not $.Values.rbac.useExistingRole) }} + name: {{ template "kube-state-metrics.fullname" $ }} +{{- else }} + name: {{ $.Values.rbac.useExistingRole }} +{{- end }} +subjects: +- kind: ServiceAccount + name: {{ template "kube-state-metrics.serviceAccountName" $ }} + namespace: {{ template "kube-state-metrics.namespace" $ }} +{{- end -}} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/service.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/service.yaml new file mode 100644 index 0000000..c0645d9 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/service.yaml @@ -0,0 +1,49 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + annotations: + {{- if .Values.prometheusScrape }} + prometheus.io/scrape: '{{ .Values.prometheusScrape }}' + {{- end }} + {{- if .Values.service.annotations }} + {{- toYaml .Values.service.annotations | nindent 4 }} + {{- end }} +spec: + type: "{{ .Values.service.type }}" + ports: + - name: "http" + protocol: TCP + port: {{ .Values.service.port | default 8080}} + {{- if .Values.service.nodePort }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} + targetPort: {{ .Values.service.port | default 8080}} + {{ if .Values.selfMonitor.enabled }} + - name: "metrics" + protocol: TCP + port: {{ .Values.selfMonitor.telemetryPort | default 8081 }} + targetPort: {{ .Values.selfMonitor.telemetryPort | default 8081 }} + {{- if .Values.selfMonitor.telemetryNodePort }} + nodePort: {{ .Values.selfMonitor.telemetryNodePort }} + {{- end }} + {{ end }} +{{- if .Values.service.loadBalancerIP }} + loadBalancerIP: "{{ .Values.service.loadBalancerIP }}" +{{- end }} +{{- if .Values.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.service.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} +{{- if .Values.autosharding.enabled }} + clusterIP: None +{{- else if .Values.service.clusterIP }} + clusterIP: "{{ .Values.service.clusterIP }}" +{{- end }} + selector: + {{- include "kube-state-metrics.selectorLabels" . | indent 4 }} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/serviceaccount.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/serviceaccount.yaml new file mode 100644 index 0000000..4775530 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/serviceaccount.yaml @@ -0,0 +1,15 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + name: {{ template "kube-state-metrics.serviceAccountName" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} +{{- if .Values.serviceAccount.annotations }} + annotations: +{{ toYaml .Values.serviceAccount.annotations | indent 4 }} +{{- end }} +imagePullSecrets: + {{- include "kube-state-metrics.imagePullSecrets" (dict "Values" .Values "imagePullSecrets" .Values.serviceAccount.imagePullSecrets) | indent 2 }} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/servicemonitor.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/servicemonitor.yaml new file mode 100644 index 0000000..35975f5 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/servicemonitor.yaml @@ -0,0 +1,120 @@ +{{- if .Values.prometheus.monitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} + {{- with .Values.prometheus.monitor.additionalLabels }} + {{- tpl (toYaml . | nindent 4) $ }} + {{- end }} + {{- with .Values.prometheus.monitor.annotations }} + annotations: + {{- tpl (toYaml . | nindent 4) $ }} + {{- end }} +spec: + jobLabel: {{ default "app.kubernetes.io/name" .Values.prometheus.monitor.jobLabel }} + {{- with .Values.prometheus.monitor.targetLabels }} + targetLabels: + {{- toYaml . | trim | nindent 4 }} + {{- end }} + {{- with .Values.prometheus.monitor.podTargetLabels }} + podTargetLabels: + {{- toYaml . | trim | nindent 4 }} + {{- end }} + {{- include "servicemonitor.scrapeLimits" .Values.prometheus.monitor | indent 2 }} + {{- if .Values.prometheus.monitor.namespaceSelector }} + namespaceSelector: + matchNames: + {{- with .Values.prometheus.monitor.namespaceSelector }} + {{- toYaml . | nindent 6 }} + {{- end }} + {{- end }} + selector: + matchLabels: + {{- with .Values.prometheus.monitor.selectorOverride }} + {{- toYaml . | nindent 6 }} + {{- else }} + {{- include "kube-state-metrics.selectorLabels" . | indent 6 }} + {{- end }} + endpoints: + - port: http + {{- if .Values.prometheus.monitor.interval }} + interval: {{ .Values.prometheus.monitor.interval }} + {{- end }} + {{- if .Values.prometheus.monitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.prometheus.monitor.scrapeTimeout }} + {{- end }} + {{- if .Values.prometheus.monitor.proxyUrl }} + proxyUrl: {{ .Values.prometheus.monitor.proxyUrl}} + {{- end }} + {{- if .Values.prometheus.monitor.enableHttp2 }} + enableHttp2: {{ .Values.prometheus.monitor.enableHttp2}} + {{- end }} + {{- if .Values.prometheus.monitor.honorLabels }} + honorLabels: true + {{- end }} + {{- if .Values.prometheus.monitor.metricRelabelings }} + metricRelabelings: + {{- toYaml .Values.prometheus.monitor.metricRelabelings | nindent 8 }} + {{- end }} + {{- if .Values.prometheus.monitor.relabelings }} + relabelings: + {{- toYaml .Values.prometheus.monitor.relabelings | nindent 8 }} + {{- end }} + {{- if .Values.prometheus.monitor.scheme }} + scheme: {{ .Values.prometheus.monitor.scheme }} + {{- end }} + {{- if .Values.prometheus.monitor.tlsConfig }} + tlsConfig: + {{- toYaml .Values.prometheus.monitor.tlsConfig | nindent 8 }} + {{- end }} + {{- if .Values.prometheus.monitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.prometheus.monitor.bearerTokenFile }} + {{- end }} + {{- with .Values.prometheus.monitor.bearerTokenSecret }} + bearerTokenSecret: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.selfMonitor.enabled }} + - port: metrics + {{- if .Values.prometheus.monitor.interval }} + interval: {{ .Values.prometheus.monitor.interval }} + {{- end }} + {{- if .Values.prometheus.monitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.prometheus.monitor.scrapeTimeout }} + {{- end }} + {{- if .Values.prometheus.monitor.proxyUrl }} + proxyUrl: {{ .Values.prometheus.monitor.proxyUrl}} + {{- end }} + {{- if .Values.prometheus.monitor.enableHttp2 }} + enableHttp2: {{ .Values.prometheus.monitor.enableHttp2}} + {{- end }} + {{- if .Values.prometheus.monitor.honorLabels }} + honorLabels: true + {{- end }} + {{- if .Values.prometheus.monitor.metricRelabelings }} + metricRelabelings: + {{- toYaml .Values.prometheus.monitor.metricRelabelings | nindent 8 }} + {{- end }} + {{- if .Values.prometheus.monitor.relabelings }} + relabelings: + {{- toYaml .Values.prometheus.monitor.relabelings | nindent 8 }} + {{- end }} + {{- if .Values.prometheus.monitor.scheme }} + scheme: {{ .Values.prometheus.monitor.scheme }} + {{- end }} + {{- if .Values.prometheus.monitor.tlsConfig }} + tlsConfig: + {{- toYaml .Values.prometheus.monitor.tlsConfig | nindent 8 }} + {{- end }} + {{- if .Values.prometheus.monitor.bearerTokenFile }} + bearerTokenFile: {{ .Values.prometheus.monitor.bearerTokenFile }} + {{- end }} + {{- with .Values.prometheus.monitor.bearerTokenSecret }} + bearerTokenSecret: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/stsdiscovery-role.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/stsdiscovery-role.yaml new file mode 100644 index 0000000..5f452ab --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/stsdiscovery-role.yaml @@ -0,0 +1,26 @@ +{{- if and .Values.autosharding.enabled .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: stsdiscovery-{{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} +rules: +- apiGroups: + - "" + resources: + - pods + verbs: + - get +- apiGroups: + - apps + resourceNames: + - {{ template "kube-state-metrics.fullname" . }} + resources: + - statefulsets + verbs: + - get + - list + - watch +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/stsdiscovery-rolebinding.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/stsdiscovery-rolebinding.yaml new file mode 100644 index 0000000..cf29928 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/stsdiscovery-rolebinding.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.autosharding.enabled .Values.rbac.create -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: stsdiscovery-{{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: stsdiscovery-{{ template "kube-state-metrics.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ template "kube-state-metrics.serviceAccountName" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/verticalpodautoscaler.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/verticalpodautoscaler.yaml new file mode 100644 index 0000000..854185e --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/templates/verticalpodautoscaler.yaml @@ -0,0 +1,44 @@ +{{- if and (.Capabilities.APIVersions.Has "autoscaling.k8s.io/v1") (.Values.verticalPodAutoscaler.enabled) }} +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: {{ template "kube-state-metrics.fullname" . }} + namespace: {{ template "kube-state-metrics.namespace" . }} + labels: + {{- include "kube-state-metrics.labels" . | indent 4 }} +spec: + {{- with .Values.verticalPodAutoscaler.recommenders }} + recommenders: + {{- toYaml . | nindent 4 }} + {{- end }} + resourcePolicy: + containerPolicies: + - containerName: {{ template "kube-state-metrics.name" . }} + {{- with .Values.verticalPodAutoscaler.controlledResources }} + controlledResources: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.verticalPodAutoscaler.controlledValues }} + controlledValues: {{ .Values.verticalPodAutoscaler.controlledValues }} + {{- end }} + {{- if .Values.verticalPodAutoscaler.maxAllowed }} + maxAllowed: + {{ toYaml .Values.verticalPodAutoscaler.maxAllowed | nindent 8 }} + {{- end }} + {{- if .Values.verticalPodAutoscaler.minAllowed }} + minAllowed: + {{ toYaml .Values.verticalPodAutoscaler.minAllowed | nindent 8 }} + {{- end }} + targetRef: + apiVersion: apps/v1 + {{- if .Values.autosharding.enabled }} + kind: StatefulSet + {{- else }} + kind: Deployment + {{- end }} + name: {{ template "kube-state-metrics.fullname" . }} + {{- with .Values.verticalPodAutoscaler.updatePolicy }} + updatePolicy: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/values.yaml b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/values.yaml new file mode 100644 index 0000000..8638049 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/kube-state-metrics/values.yaml @@ -0,0 +1,456 @@ +# Default values for kube-state-metrics. +prometheusScrape: true +image: + registry: registry.k8s.io + repository: kube-state-metrics/kube-state-metrics + # If unset use v + .Charts.appVersion + tag: "v2.9.2" + sha: "" + pullPolicy: IfNotPresent + +imagePullSecrets: [] +# - name: "image-pull-secret" + +global: + # To help compatibility with other charts which use global.imagePullSecrets. + # Allow either an array of {name: pullSecret} maps (k8s-style), or an array of strings (more common helm-style). + # global: + # imagePullSecrets: + # - name: pullSecret1 + # - name: pullSecret2 + # or + # global: + # imagePullSecrets: + # - pullSecret1 + # - pullSecret2 + imagePullSecrets: [] + # + # Allow parent charts to override registry hostname + imageRegistry: "" + +# If set to true, this will deploy kube-state-metrics as a StatefulSet and the data +# will be automatically sharded across <.Values.replicas> pods using the built-in +# autodiscovery feature: https://github.com/kubernetes/kube-state-metrics#automated-sharding +# This is an experimental feature and there are no stability guarantees. +autosharding: + enabled: false + +replicas: 1 + +# Change the deployment strategy when autosharding is disabled +# updateStrategy: Recreate + +# Number of old history to retain to allow rollback +# Default Kubernetes value is set to 10 +revisionHistoryLimit: 10 + +# List of additional cli arguments to configure kube-state-metrics +# for example: --enable-gzip-encoding, --log-file, etc. +# all the possible args can be found here: https://github.com/kubernetes/kube-state-metrics/blob/master/docs/cli-arguments.md +extraArgs: [] + +service: + port: 8080 + # Default to clusterIP for backward compatibility + type: ClusterIP + nodePort: 0 + loadBalancerIP: "" + # Only allow access to the loadBalancerIP from these IPs + loadBalancerSourceRanges: [] + clusterIP: "" + annotations: {} + +## Additional labels to add to all resources +customLabels: {} + # app: kube-state-metrics + +## Override selector labels +selectorOverride: {} + +## set to true to add the release label so scraping of the servicemonitor with kube-prometheus-stack works out of the box +releaseLabel: false + +hostNetwork: false + +rbac: + # If true, create & use RBAC resources + create: true + + # Set to a rolename to use existing role - skipping role creating - but still doing serviceaccount and rolebinding to it, rolename set here. + # useExistingRole: your-existing-role + + # If set to false - Run without Cluteradmin privs needed - ONLY works if namespace is also set (if useExistingRole is set this name is used as ClusterRole or Role to bind to) + useClusterRole: true + + # Add permissions for CustomResources' apiGroups in Role/ClusterRole. Should be used in conjunction with Custom Resource State Metrics configuration + # Example: + # - apiGroups: ["monitoring.coreos.com"] + # resources: ["prometheuses"] + # verbs: ["list", "watch"] + extraRules: [] + +# Configure kube-rbac-proxy. When enabled, creates one kube-rbac-proxy container per exposed HTTP endpoint (metrics and telemetry if enabled). +# The requests are served through the same service but requests are then HTTPS. +kubeRBACProxy: + enabled: false + image: + registry: quay.io + repository: brancz/kube-rbac-proxy + tag: v0.14.0 + sha: "" + pullPolicy: IfNotPresent + + # List of additional cli arguments to configure kube-rbac-prxy + # for example: --tls-cipher-suites, --log-file, etc. + # all the possible args can be found here: https://github.com/brancz/kube-rbac-proxy#usage + extraArgs: [] + + ## Specify security settings for a Container + ## Allows overrides and additional options compared to (Pod) securityContext + ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container + containerSecurityContext: {} + + resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 64Mi + # requests: + # cpu: 10m + # memory: 32Mi + + ## volumeMounts enables mounting custom volumes in rbac-proxy containers + ## Useful for TLS certificates and keys + volumeMounts: [] + # - mountPath: /etc/tls + # name: kube-rbac-proxy-tls + # readOnly: true + +serviceAccount: + # Specifies whether a ServiceAccount should be created, require rbac true + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the fullname template + name: + # Reference to one or more secrets to be used when pulling images + # ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ + imagePullSecrets: [] + # ServiceAccount annotations. + # Use case: AWS EKS IAM roles for service accounts + # ref: https://docs.aws.amazon.com/eks/latest/userguide/specify-service-account-role.html + annotations: {} + +prometheus: + monitor: + enabled: false + annotations: {} + additionalLabels: {} + namespace: "" + namespaceSelector: [] + jobLabel: "" + targetLabels: [] + podTargetLabels: [] + interval: "" + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + scrapeTimeout: "" + proxyUrl: "" + ## Whether to enable HTTP2 for servicemonitor + # enableHttp2: false + selectorOverride: {} + honorLabels: false + metricRelabelings: [] + relabelings: [] + scheme: "" + ## File to read bearer token for scraping targets + bearerTokenFile: "" + ## Secret to mount to read bearer token for scraping targets. The secret needs + ## to be in the same namespace as the service monitor and accessible by the + ## Prometheus Operator + bearerTokenSecret: {} + # name: secret-name + # key: key-name + tlsConfig: {} + +## Specify if a Pod Security Policy for kube-state-metrics must be created +## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/ +## +podSecurityPolicy: + enabled: false + annotations: {} + ## Specify pod annotations + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#sysctl + ## + # seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*' + # seccomp.security.alpha.kubernetes.io/defaultProfileName: 'docker/default' + # apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default' + + additionalVolumes: [] + +## Configure network policy for kube-state-metrics +networkPolicy: + enabled: false + # networkPolicy.flavor -- Flavor of the network policy to use. + # Can be: + # * kubernetes for networking.k8s.io/v1/NetworkPolicy + # * cilium for cilium.io/v2/CiliumNetworkPolicy + flavor: kubernetes + + ## Configure the cilium network policy kube-apiserver selector + # cilium: + # kubeApiServerSelector: + # - toEntities: + # - kube-apiserver + + # egress: + # - {} + # ingress: + # - {} + # podSelector: + # matchLabels: + # app.kubernetes.io/name: kube-state-metrics + +securityContext: + enabled: true + runAsGroup: 65534 + runAsUser: 65534 + fsGroup: 65534 + runAsNonRoot: true + seccompProfile: + type: RuntimeDefault + +## Specify security settings for a Container +## Allows overrides and additional options compared to (Pod) securityContext +## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container +containerSecurityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + +## Node labels for pod assignment +## Ref: https://kubernetes.io/docs/user-guide/node-selection/ +nodeSelector: {} + +## Affinity settings for pod assignment +## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/ +affinity: {} + +## Tolerations for pod assignment +## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +tolerations: [] + +## Topology spread constraints for pod assignment +## Ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/ +topologySpreadConstraints: [] + +# Annotations to be added to the deployment/statefulset +annotations: {} + +# Annotations to be added to the pod +podAnnotations: {} + +## Assign a PriorityClassName to pods if set +# priorityClassName: "" + +# Ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/ +podDisruptionBudget: {} + +# Comma-separated list of metrics to be exposed. +# This list comprises of exact metric names and/or regex patterns. +# The allowlist and denylist are mutually exclusive. +metricAllowlist: [] + +# Comma-separated list of metrics not to be enabled. +# This list comprises of exact metric names and/or regex patterns. +# The allowlist and denylist are mutually exclusive. +metricDenylist: [] + +# Comma-separated list of additional Kubernetes label keys that will be used in the resource's +# labels metric. By default the metric contains only name and namespace labels. +# To include additional labels, provide a list of resource names in their plural form and Kubernetes +# label keys you would like to allow for them (Example: '=namespaces=[k8s-label-1,k8s-label-n,...],pods=[app],...)'. +# A single '*' can be provided per resource instead to allow any labels, but that has +# severe performance implications (Example: '=pods=[*]'). +metricLabelsAllowlist: [] + # - namespaces=[k8s-label-1,k8s-label-n] + +# Comma-separated list of Kubernetes annotations keys that will be used in the resource' +# labels metric. By default the metric contains only name and namespace labels. +# To include additional annotations provide a list of resource names in their plural form and Kubernetes +# annotation keys you would like to allow for them (Example: '=namespaces=[kubernetes.io/team,...],pods=[kubernetes.io/team],...)'. +# A single '*' can be provided per resource instead to allow any annotations, but that has +# severe performance implications (Example: '=pods=[*]'). +metricAnnotationsAllowList: [] + # - pods=[k8s-annotation-1,k8s-annotation-n] + +# Available collectors for kube-state-metrics. +# By default, all available resources are enabled, comment out to disable. +collectors: + - certificatesigningrequests + - configmaps + - cronjobs + - daemonsets + - deployments + - endpoints + - horizontalpodautoscalers + - ingresses + - jobs + - leases + - limitranges + - mutatingwebhookconfigurations + - namespaces + - networkpolicies + - nodes + - persistentvolumeclaims + - persistentvolumes + - poddisruptionbudgets + - pods + - replicasets + - replicationcontrollers + - resourcequotas + - secrets + - services + - statefulsets + - storageclasses + - validatingwebhookconfigurations + - volumeattachments + +# Enabling kubeconfig will pass the --kubeconfig argument to the container +kubeconfig: + enabled: false + # base64 encoded kube-config file + secret: + +# Enabling support for customResourceState, will create a configMap including your config that will be read from kube-state-metrics +customResourceState: + enabled: false + # Add (Cluster)Role permissions to list/watch the customResources defined in the config to rbac.extraRules + config: {} + +# Enable only the release namespace for collecting resources. By default all namespaces are collected. +# If releaseNamespace and namespaces are both set a merged list will be collected. +releaseNamespace: false + +# Comma-separated list(string) or yaml list of namespaces to be enabled for collecting resources. By default all namespaces are collected. +namespaces: "" + +# Comma-separated list of namespaces not to be enabled. If namespaces and namespaces-denylist are both set, +# only namespaces that are excluded in namespaces-denylist will be used. +namespacesDenylist: "" + +## Override the deployment namespace +## +namespaceOverride: "" + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 64Mi + # requests: + # cpu: 10m + # memory: 32Mi + +## Provide a k8s version to define apiGroups for podSecurityPolicy Cluster Role. +## For example: kubeTargetVersionOverride: 1.14.9 +## +kubeTargetVersionOverride: "" + +# Enable self metrics configuration for service and Service Monitor +# Default values for telemetry configuration can be overridden +# If you set telemetryNodePort, you must also set service.type to NodePort +selfMonitor: + enabled: false + # telemetryHost: 0.0.0.0 + # telemetryPort: 8081 + # telemetryNodePort: 0 + +# Enable vertical pod autoscaler support for kube-state-metrics +verticalPodAutoscaler: + enabled: false + + # Recommender responsible for generating recommendation for the object. + # List should be empty (then the default recommender will generate the recommendation) + # or contain exactly one recommender. + # recommenders: [] + # - name: custom-recommender-performance + + # List of resources that the vertical pod autoscaler can control. Defaults to cpu and memory + controlledResources: [] + # Specifies which resource values should be controlled: RequestsOnly or RequestsAndLimits. + # controlledValues: RequestsAndLimits + + # Define the max allowed resources for the pod + maxAllowed: {} + # cpu: 200m + # memory: 100Mi + # Define the min allowed resources for the pod + minAllowed: {} + # cpu: 200m + # memory: 100Mi + + # updatePolicy: + # Specifies minimal number of replicas which need to be alive for VPA Updater to attempt pod eviction + # minReplicas: 1 + # Specifies whether recommended updates are applied when a Pod is started and whether recommended updates + # are applied during the life of a Pod. Possible values are "Off", "Initial", "Recreate", and "Auto". + # updateMode: Auto + +# volumeMounts are used to add custom volume mounts to deployment. +# See example below +volumeMounts: [] +# - mountPath: /etc/config +# name: config-volume + +# volumes are used to add custom volumes to deployment +# See example below +volumes: [] +# - configMap: +# name: cm-for-volume +# name: config-volume + +# Extra manifests to deploy as an array +extraManifests: [] + # - apiVersion: v1 + # kind: ConfigMap + # metadata: + # labels: + # name: prometheus-extra + # data: + # extra-data: "value" + +## Containers allows injecting additional containers. +containers: [] + # - name: crd-init + # image: kiwigrid/k8s-sidecar:latest + +## InitContainers allows injecting additional initContainers. +initContainers: [] + # - name: crd-sidecar + # image: kiwigrid/k8s-sidecar:latest diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/Chart.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/Chart.yaml new file mode 100644 index 0000000..4db7d02 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +description: Prometheus Blackbox Exporter +name: prometheus-blackbox-exporter +version: 8.6.1 +appVersion: v0.24.0 +home: https://github.com/prometheus/blackbox_exporter +sources: + - https://github.com/prometheus/blackbox_exporter + - https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-blackbox-exporter +keywords: + - prometheus + - blackbox + - monitoring +maintainers: + - name: desaintmartin + email: cedric@desaintmartin.fr + - name: gianrubio + email: gianrubio@gmail.com + - name: rsotnychenko + email: me@sota.sh + - name: monotek + email: monotek23@gmail.com +type: application diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/README.md b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/README.md new file mode 100644 index 0000000..88043fc --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/README.md @@ -0,0 +1,113 @@ +# Prometheus Blackbox Exporter + +Prometheus exporter for blackbox testing + +Learn more: [https://github.com/prometheus/blackbox_exporter](https://github.com/prometheus/blackbox_exporter) + +This chart creates a Blackbox-Exporter deployment on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager. + +## Prerequisites + +- Kubernetes 1.8+ with Beta APIs enabled +- Helm >= 3.0 + +## Get Repository Info + +```console +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +``` + +_See [`helm repo`](https://helm.sh/docs/helm/helm_repo/) for command documentation._ + +## Install Chart + +```console +helm install [RELEASE_NAME] prometheus-community/prometheus-blackbox-exporter +``` + +_See [configuration](#configuration) below._ + +_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ + +## Uninstall Chart + +```console +helm uninstall [RELEASE_NAME] +``` + +This removes all the Kubernetes components associated with the chart and deletes the release. + +_See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall/) for command documentation._ + +## Upgrading Chart + +```console +helm upgrade [RELEASE_NAME] [CHART] --install +``` + +_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._ + +### To 8.0.0 + +- The default image is set to `quay.io/prometheus/blackbox-exporter` instead `prom/blackbox-exporter` +- `image.repository` is now split into `image.registry` and `image.repository`. + For the old behavior, set `image.registry` to an empty string and only use `image.repository`. + +### To 7.0.0 + +This version introduces the `securityContext` and `podSecurityContext` and removes `allowICMP`option. + +All previous values are setup as default. In case that you want to enable previous functionality for `allowICMP` you need to explicit enabled with the following configuration: + +```yaml +securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + capabilities: + add: ["NET_RAW"] +``` + +### To 6.0.0 + +This version introduces the relabeling field for the ServiceMonitor. +All values in the list `additionalRelabeling` will now appear under `relabelings` instead of `metricRelabelings`. + +### To 5.0.0 + +This version removes Helm 2 support. Also the ingress config has changed, so you have to adapt to the example in the values.yaml. + +### To 4.0.0 + +This version create the service account by default and introduce pod security policy, it can be enabled by setting `pspEnabled: true`. + +### To 2.0.0 + +This version removes the `podDisruptionBudget.enabled` parameter and changes the default value of `podDisruptionBudget` to `{}`, in order to fix Helm 3 compatibility. + +In order to upgrade, please remove `podDisruptionBudget.enabled` from your custom values.yaml file and set the content of `podDisruptionBudget`, for example: + +```yaml +podDisruptionBudget: + maxUnavailable: 0 +``` + +### To 1.0.0 + +This version introduce the new recommended labels. + +In order to upgrade, delete the Deployment before upgrading: + +```bash +kubectl delete deployment [RELEASE_NAME]-prometheus-blackbox-exporter +``` + +Note that this will cause downtime of the blackbox. + +## Configuration + +See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments, visit the chart's [values.yaml](./values.yaml), or run these configuration commands: + +```console +helm show values prometheus-community/prometheus-blackbox-exporter +``` diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/daemonset-values.yml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/daemonset-values.yml new file mode 100644 index 0000000..6ba1552 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/daemonset-values.yml @@ -0,0 +1 @@ +kind: DaemonSet diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/default-values.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/default-values.yaml new file mode 100644 index 0000000..e69de29 diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/extraenv-values.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/extraenv-values.yaml new file mode 100644 index 0000000..3aa629f --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/extraenv-values.yaml @@ -0,0 +1,3 @@ +extraEnv: + HTTP_PROXY: "http://superproxy.com:3128" + NO_PROXY: "localhost,127.0.0.1" diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/hostAliases.yml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/hostAliases.yml new file mode 100644 index 0000000..59eb8d1 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/hostAliases.yml @@ -0,0 +1,9 @@ +hostAliases: + - ip: 192.168.1.1 + hostNames: + - test.example.com + - another.example.net + - ip: 192.168.1.2 + hostNames: + - test2.example.com + - another2.example.net \ No newline at end of file diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/ingress-values.yml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/ingress-values.yml new file mode 100644 index 0000000..f1873fe --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/ingress-values.yml @@ -0,0 +1,2 @@ +ingress: + enabled: true \ No newline at end of file diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/networkpolicy-values.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/networkpolicy-values.yaml new file mode 100644 index 0000000..db909ab --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/networkpolicy-values.yaml @@ -0,0 +1,2 @@ +networkPolicy: + enabled: true diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/secret-values.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/secret-values.yaml new file mode 100644 index 0000000..504a152 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/ci/secret-values.yaml @@ -0,0 +1 @@ +secretConfig: true diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/NOTES.txt b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/NOTES.txt new file mode 100644 index 0000000..ccaccf0 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/NOTES.txt @@ -0,0 +1,31 @@ +See https://github.com/prometheus/blackbox_exporter/ for how to configure Prometheus and the Blackbox Exporter. + +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} + +{{- $kubeVersion := include "prometheus-blackbox-exporter.kubeVersion" . -}} +{{ if and .Values.ingress.className (semverCompare "<=1.18-0" $kubeVersion) }} +You've set ".Values.ingressClassName" but it's not supported by your Kubernetes version! +Therefore the option was not added and the old ingress annotation was set. +{{ end }} + +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ template "prometheus-blackbox-exporter.namespace" . }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "prometheus-blackbox-exporter.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ template "prometheus-blackbox-exporter.namespace" . }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get --namespace {{ template "prometheus-blackbox-exporter.namespace" . }} svc -w {{ include "prometheus-blackbox-exporter.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ template "prometheus-blackbox-exporter.namespace" . }} {{ include "prometheus-blackbox-exporter.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ template "prometheus-blackbox-exporter.namespace" . }} -l "app.kubernetes.io/name={{ include "prometheus-blackbox-exporter.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ template "prometheus-blackbox-exporter.namespace" . }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ template "prometheus-blackbox-exporter.namespace" . }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/_helpers.tpl b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/_helpers.tpl new file mode 100644 index 0000000..763f5a8 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/_helpers.tpl @@ -0,0 +1,248 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "prometheus-blackbox-exporter.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "prometheus-blackbox-exporter.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "prometheus-blackbox-exporter.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "prometheus-blackbox-exporter.labels" -}} +helm.sh/chart: {{ include "prometheus-blackbox-exporter.chart" . }} +{{ include "prometheus-blackbox-exporter.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- if .Values.releaseLabel }} +release: {{ .Release.Name }} +{{- end }} +{{- if .Values.commonLabels }} +{{ toYaml .Values.commonLabels }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "prometheus-blackbox-exporter.selectorLabels" -}} +app.kubernetes.io/name: {{ include "prometheus-blackbox-exporter.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "prometheus-blackbox-exporter.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "prometheus-blackbox-exporter.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Return the appropriate apiVersion for rbac. +*/}} +{{- define "rbac.apiVersion" -}} +{{- if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1" }} +{{- print "rbac.authorization.k8s.io/v1" -}} +{{- else -}} +{{- print "rbac.authorization.k8s.io/v1beta1" -}} +{{- end -}} +{{- end -}} + + +{{- define "prometheus-blackbox-exporter.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* Enable overriding Kubernetes version for some use cases */}} +{{- define "prometheus-blackbox-exporter.kubeVersion" -}} + {{- default .Capabilities.KubeVersion.Version .Values.kubeVersionOverride -}} +{{- end -}} + + +{{/* +The image to use +*/}} +{{- define "prometheus-blackbox-exporter.image" -}} +{{- with (.Values.global.imageRegistry | default .Values.image.registry) -}}{{ . }}/{{- end }} +{{- .Values.image.repository -}}:{{- .Values.image.tag | default .Chart.AppVersion -}} +{{- with .Values.image.digest -}}@{{ .}}{{- end -}} +{{- end -}} + +{{/* +Define pod spec to be reused by highlevel resources (deployment, daemonset) +*/}} +{{- define "prometheus-blackbox-exporter.podSpec" -}} +automountServiceAccountToken: {{ .Values.automountServiceAccountToken }} +serviceAccountName: {{ template "prometheus-blackbox-exporter.serviceAccountName" . }} +{{- with .Values.topologySpreadConstraints }} +topologySpreadConstraints: +{{ toYaml . }} +{{- end }} +{{- with .Values.nodeSelector }} +nodeSelector: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with .Values.affinity }} +affinity: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with .Values.tolerations }} +tolerations: +{{ toYaml . }} +{{- end }} +{{- if .Values.image.pullSecrets }} +imagePullSecrets: +{{- range .Values.image.pullSecrets }} +- name: {{ . }} +{{- end }} +{{- end }} +{{- if .Values.hostAliases }} +hostAliases: +{{- range .Values.hostAliases }} +- ip: {{ .ip }} + hostnames: + {{- range .hostNames }} + - {{ . }} + {{- end }} +{{- end }} +{{- end }} +restartPolicy: {{ .Values.restartPolicy }} +{{- with .Values.priorityClassName }} +priorityClassName: "{{ . }}" +{{- end }} +{{- with .Values.podSecurityContext }} +securityContext: +{{ toYaml . | indent 2 }} +{{- end }} +{{- with .Values.extraInitContainers }} +initContainers: +{{ toYaml . }} +{{- end }} +containers: +{{ with .Values.extraContainers }} + {{- toYaml . }} +{{- end }} +- name: blackbox-exporter + image: {{ include "prometheus-blackbox-exporter.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 4 }} + {{- end }} + env: + {{- range $key, $value := .Values.extraEnv }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + args: + {{- if .Values.config }} + {{- if .Values.configPath }} + - "--config.file={{ .Values.configPath }}" + {{- else }} + - "--config.file=/config/blackbox.yaml" + {{- end }} + {{- else }} + - "--config.file=/etc/blackbox_exporter/config.yml" + {{- end }} + {{- with .Values.extraArgs }} +{{ tpl (toYaml .) $ | indent 2 }} + {{- end }} + {{- with .Values.resources }} + resources: +{{ toYaml . | indent 4 }} + {{- end }} + ports: + - containerPort: {{ .Values.containerPort }} + name: http + livenessProbe: + {{- toYaml .Values.livenessProbe | trim | nindent 4 }} + readinessProbe: + {{- toYaml .Values.readinessProbe | trim | nindent 4 }} + volumeMounts: + - mountPath: /config + name: config + {{- range .Values.extraConfigmapMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath | default "" }} + readOnly: {{ .readOnly }} + {{- end }} + {{- range .Values.extraSecretMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath }} + readOnly: {{ .readOnly }} + {{- end }} + {{- if .Values.extraVolumeMounts }} +{{ toYaml .Values.extraVolumeMounts | indent 2 }} + {{- end }} + {{- if .Values.dnsPolicy }} +dnsPolicy: {{ .Values.dnsPolicy | toString }} +{{- end }} +hostNetwork: {{ .Values.hostNetwork }} +{{- with .Values.dnsConfig }} +dnsConfig: + {{- toYaml . | nindent 2 }} +{{- end }} +volumes: +{{- if .Values.extraVolumes }} +{{ toYaml .Values.extraVolumes }} +{{- end }} +- name: config +{{- if .Values.secretConfig }} + secret: + secretName: {{ template "prometheus-blackbox-exporter.fullname" . }} +{{- else if .Values.configExistingSecretName }} + secret: + secretName: {{ .Values.configExistingSecretName }} +{{- else }} + configMap: + name: {{ template "prometheus-blackbox-exporter.fullname" . }} +{{- end }} +{{- range .Values.extraConfigmapMounts }} +- name: {{ .name }} + configMap: + name: {{ .configMap }} + defaultMode: {{ .defaultMode }} +{{- end }} +{{- range .Values.extraSecretMounts }} +- name: {{ .name }} + secret: + secretName: {{ .secretName }} + defaultMode: {{ .defaultMode }} +{{- end }} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/configmap.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/configmap.yaml new file mode 100644 index 0000000..5490b14 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/configmap.yaml @@ -0,0 +1,12 @@ +{{- if and .Values.config (eq .Values.configExistingSecretName "") }} +apiVersion: v1 +kind: {{ if .Values.secretConfig -}} Secret {{- else -}} ConfigMap {{- end }} +metadata: + name: {{ template "prometheus-blackbox-exporter.fullname" . }} + namespace: {{ template "prometheus-blackbox-exporter.namespace" . }} + labels: + {{- include "prometheus-blackbox-exporter.labels" . | nindent 4 }} +{{ if .Values.secretConfig -}} stringData: {{- else -}} data: {{- end }} + blackbox.yaml: | +{{ toYaml .Values.config | indent 4 }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/daemonset.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/daemonset.yaml new file mode 100644 index 0000000..cb36f9f --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/daemonset.yaml @@ -0,0 +1,27 @@ +{{- if (eq .Values.kind "DaemonSet") }} +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ template "prometheus-blackbox-exporter.fullname" . }} + namespace: {{ template "prometheus-blackbox-exporter.namespace" . }} + labels: + {{- include "prometheus-blackbox-exporter.labels" . | nindent 4 }} +spec: + selector: + matchLabels: + {{- include "prometheus-blackbox-exporter.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "prometheus-blackbox-exporter.labels" . | nindent 8 }} + {{- if .Values.pod.labels }} +{{ toYaml .Values.pod.labels | indent 8 }} + {{- end }} + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- if .Values.podAnnotations }} +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + {{- include "prometheus-blackbox-exporter.podSpec" . | nindent 6 }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/deployment.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/deployment.yaml new file mode 100644 index 0000000..607de44 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/deployment.yaml @@ -0,0 +1,30 @@ +{{- if (eq .Values.kind "Deployment") }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "prometheus-blackbox-exporter.fullname" . }} + namespace: {{ template "prometheus-blackbox-exporter.namespace" . }} + labels: + {{- include "prometheus-blackbox-exporter.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicas }} + selector: + matchLabels: + {{- include "prometheus-blackbox-exporter.selectorLabels" . | nindent 6 }} + strategy: +{{ toYaml .Values.strategy | indent 4 }} + template: + metadata: + labels: + {{- include "prometheus-blackbox-exporter.labels" . | nindent 8 }} + {{- if .Values.pod.labels }} +{{ toYaml .Values.pod.labels | indent 8 }} + {{- end }} + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- if .Values.podAnnotations }} +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + {{- include "prometheus-blackbox-exporter.podSpec" . | nindent 6 }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/extra-manifests.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/extra-manifests.yaml new file mode 100644 index 0000000..f0c859e --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/extra-manifests.yaml @@ -0,0 +1,4 @@ +{{ range .Values.extraManifests }} +--- +{{ tpl (toYaml .) $ }} +{{ end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/ingress.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/ingress.yaml new file mode 100644 index 0000000..394baa1 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/ingress.yaml @@ -0,0 +1,66 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "prometheus-blackbox-exporter.fullname" . -}} +{{- $svcPort := .Values.service.port -}} +{{- $kubeVersion := include "prometheus-blackbox-exporter.kubeVersion" . -}} +{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" $kubeVersion)) }} + {{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }} + {{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}} + {{- end }} +{{- end }} +{{- if semverCompare ">=1.19-0" $kubeVersion -}} +apiVersion: networking.k8s.io/v1 +{{- else if semverCompare ">=1.14-0" $kubeVersion -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + namespace: {{ template "prometheus-blackbox-exporter.namespace" . }} + labels: + {{- include "prometheus-blackbox-exporter.labels" . | nindent 4 }} + {{- if .Values.ingress.labels }} +{{ toYaml .Values.ingress.labels | indent 4 }} + {{- end}} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if and .Values.ingress.className (semverCompare ">=1.18-0" $kubeVersion) }} + ingressClassName: {{ .Values.ingress.className }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ tpl . $ | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ tpl .host $ | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- if and .pathType (semverCompare ">=1.18-0" $kubeVersion) }} + pathType: {{ .pathType }} + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" $kubeVersion }} + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $svcPort }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/networkpolicy.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/networkpolicy.yaml new file mode 100644 index 0000000..0ada670 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/networkpolicy.yaml @@ -0,0 +1,28 @@ +{{- if .Values.networkPolicy.enabled }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ template "prometheus-blackbox-exporter.fullname" . }} + namespace: {{ template "prometheus-blackbox-exporter.namespace" . }} + labels: + {{- include "prometheus-blackbox-exporter.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "prometheus-blackbox-exporter.selectorLabels" . | nindent 6 }} + ingress: +{{- if .Values.networkPolicy.allowMonitoringNamespace }} + - from: + - namespaceSelector: + matchLabels: + name: monitoring + ports: + - port: {{ .Values.service.port }} + protocol: TCP +{{- else }} + - {} +{{- end }} + policyTypes: + - Ingress +{{- end }} + diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/poddisruptionbudget.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/poddisruptionbudget.yaml new file mode 100644 index 0000000..7783dd7 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/poddisruptionbudget.yaml @@ -0,0 +1,18 @@ +{{- if .Values.podDisruptionBudget -}} +{{ if $.Capabilities.APIVersions.Has "policy/v1/PodDisruptionBudget" -}} +apiVersion: policy/v1 +{{- else -}} +apiVersion: policy/v1beta1 +{{- end }} +kind: PodDisruptionBudget +metadata: + name: {{ template "prometheus-blackbox-exporter.fullname" . }} + namespace: {{ template "prometheus-blackbox-exporter.namespace" . }} + labels: + {{- include "prometheus-blackbox-exporter.labels" . | nindent 4 }} +spec: + selector: + matchLabels: + {{- include "prometheus-blackbox-exporter.selectorLabels" . | nindent 6 }} +{{ toYaml .Values.podDisruptionBudget | indent 2 }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/podmonitoring.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/podmonitoring.yaml new file mode 100644 index 0000000..445b973 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/podmonitoring.yaml @@ -0,0 +1,49 @@ +{{- if .Values.podMonitoring.enabled }} +{{- range .Values.podMonitoring.targets }} +--- +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + name: {{ template "prometheus-blackbox-exporter.fullname" $ }}-{{ .name }} + namespace: {{ template "prometheus-blackbox-exporter.namespace" $ }} + labels: + {{- include "prometheus-blackbox-exporter.labels" $ | nindent 4 }} + {{- if or $.Values.podMonitoring.defaults.labels .labels }} + {{- toYaml (.labels | default $.Values.podMonitoring.defaults.labels) | nindent 4 }} + {{- end }} +spec: + endpoints: + - port: http + scheme: {{ $.Values.podMonitoring.scheme }} + {{- if $.Values.podMonitoring.tlsConfig }} + tls: {{ toYaml $.Values.podMonitoring.tlsConfig | nindent 6 }} + {{- end }} + path: {{ $.Values.podMonitoring.path }} + interval: {{ .interval | default $.Values.podMonitoring.defaults.interval }} + timeout: {{ .scrapeTimeout | default $.Values.podMonitoring.defaults.scrapeTimeout }} + params: + module: + - {{ .module | default $.Values.podMonitoring.defaults.module }} + target: + - {{ .url }} + {{- if .hostname }} + hostname: + - {{ .hostname }} + {{- end }} + metricRelabeling: + - action: replace + targetLabel: target + replacement: {{ .url }} + - action: replace + targetLabel: name + replacement: {{ .name }} + {{- range $targetLabel, $replacement := .additionalMetricsRelabels | default $.Values.podMonitoring.defaults.additionalMetricsRelabels }} + - action: replace + targetLabel: {{ $targetLabel | quote }} + replacement: {{ $replacement | quote }} + {{- end }} + selector: + matchLabels: + {{- include "prometheus-blackbox-exporter.selectorLabels" $ | nindent 6 }} +{{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/podsecuritypolicy.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/podsecuritypolicy.yaml new file mode 100644 index 0000000..05c67ef --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/podsecuritypolicy.yaml @@ -0,0 +1,41 @@ +{{- if and .Values.pspEnabled (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "prometheus-blackbox-exporter.fullname" . }}-psp + namespace: {{ template "prometheus-blackbox-exporter.namespace" . }} + labels: + {{- include "prometheus-blackbox-exporter.labels" . | nindent 4 }} +spec: + # Prevents running in privileged mode + privileged: false + # Required to prevent escalations to root. + allowPrivilegeEscalation: false + volumes: + - configMap + - secret + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: RunAsAny + seLinux: + rule: RunAsAny + supplementalGroups: + rule: 'MustRunAs' + ranges: + # Forbid adding the root group. + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + # Forbid adding the root group. + - min: 1 + max: 65535 + readOnlyRootFilesystem: {{ .Values.readOnlyRootFilesystem }} + {{- if has "NET_RAW" .Values.securityContext.capabilities.add }} + allowedCapabilities: + - NET_RAW + {{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/prometheusrule.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/prometheusrule.yaml new file mode 100644 index 0000000..fd9336f --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/prometheusrule.yaml @@ -0,0 +1,20 @@ +{{- if .Values.prometheusRule.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ template "prometheus-blackbox-exporter.fullname" . }} + {{- with .Values.prometheusRule.namespace }} + namespace: {{ . }} + {{- end }} + labels: + {{- include "prometheus-blackbox-exporter.labels" . | nindent 4 }} + {{- with .Values.prometheusRule.additionalLabels -}} +{{- toYaml . | nindent 4 -}} + {{- end }} +spec: + {{- with .Values.prometheusRule.rules }} + groups: + - name: {{ template "prometheus-blackbox-exporter.name" $ }} + rules: {{ toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/role.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/role.yaml new file mode 100644 index 0000000..3291de4 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/role.yaml @@ -0,0 +1,18 @@ +{{- if and .Values.pspEnabled (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +apiVersion: {{ template "rbac.apiVersion" . }} +kind: Role +metadata: + labels: + {{- include "prometheus-blackbox-exporter.labels" . | nindent 4 }} + name: {{ template "prometheus-blackbox-exporter.fullname" . }} + namespace: {{ template "prometheus-blackbox-exporter.namespace" . }} +rules: + - apiGroups: + - policy + resources: + - podsecuritypolicies + resourceNames: + - {{ template "prometheus-blackbox-exporter.fullname" . }}-psp + verbs: + - use +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/rolebinding.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/rolebinding.yaml new file mode 100644 index 0000000..51dabb0 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/rolebinding.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.pspEnabled (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +apiVersion: {{ template "rbac.apiVersion" . }} +kind: RoleBinding +metadata: + labels: + {{- include "prometheus-blackbox-exporter.labels" . | nindent 4 }} + name: {{ template "prometheus-blackbox-exporter.fullname" . }} + namespace: {{ template "prometheus-blackbox-exporter.namespace" . }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ template "prometheus-blackbox-exporter.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ template "prometheus-blackbox-exporter.serviceAccountName" . }} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/selfpodmonitoring.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/selfpodmonitoring.yaml new file mode 100644 index 0000000..796090a --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/selfpodmonitoring.yaml @@ -0,0 +1,32 @@ +{{- if .Values.podMonitoring.selfMonitor.enabled }} +--- +apiVersion: monitoring.googleapis.com/v1 +kind: PodMonitoring +metadata: + name: {{ template "prometheus-blackbox-exporter.fullname" $ }} + namespace: {{ template "prometheus-blackbox-exporter.namespace" $ }} + labels: + {{- include "prometheus-blackbox-exporter.labels" $ | nindent 4 }} + {{- if .Values.podMonitoring.selfMonitor.labels }} + {{- toYaml (.Values.podMonitoring.selfMonitor.labels) | nindent 4 }} + {{- end }} +spec: + endpoints: + - port: http + scheme: {{ $.Values.podMonitoring.scheme }} + path: {{ .Values.podMonitoring.selfMonitor.path }} + interval: {{ .Values.podMonitoring.selfMonitor.interval }} + timeout: {{ .Values.podMonitoring.selfMonitor.scrapeTimeout }} + +{{- if .Values.podMonitoring.selfMonitor.additionalMetricsRelabels }} + metricRelabeling: + {{- range $targetLabel, $replacement := .Values.podMonitoring.selfMonitor.additionalMetricsRelabels | default $.Values.podMonitoring.defaults.additionalMetricsRelabels }} + - action: replace + targetLabel: {{ $targetLabel | quote }} + replacement: {{ $replacement | quote }} + {{- end }} +{{- end }} + selector: + matchLabels: + {{- include "prometheus-blackbox-exporter.selectorLabels" $ | nindent 6 }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/selfservicemonitor.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/selfservicemonitor.yaml new file mode 100644 index 0000000..bc28697 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/selfservicemonitor.yaml @@ -0,0 +1,31 @@ +{{- if .Values.serviceMonitor.selfMonitor.enabled }} +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "prometheus-blackbox-exporter.fullname" $ }} + namespace: {{ template "prometheus-blackbox-exporter.namespace" $ }} + labels: + {{- include "prometheus-blackbox-exporter.labels" $ | nindent 4 }} + {{- if .Values.serviceMonitor.selfMonitor.labels }} + {{- toYaml (.Values.serviceMonitor.selfMonitor.labels) | nindent 4 }} + {{- end }} +spec: + endpoints: + - path: {{ .Values.serviceMonitor.selfMonitor.path }} + interval: {{ .Values.serviceMonitor.selfMonitor.interval }} + scrapeTimeout: {{ .Values.serviceMonitor.selfMonitor.scrapeTimeout }} + scheme: http + +{{- if .Values.serviceMonitor.selfMonitor.additionalRelabeling }} + relabelings: +{{ toYaml .Values.serviceMonitor.selfMonitor.additionalRelabeling | indent 6 }} +{{- end }} + jobLabel: "{{ .Release.Name }}" + selector: + matchLabels: + {{- include "prometheus-blackbox-exporter.selectorLabels" $ | nindent 6 }} + namespaceSelector: + matchNames: + - {{ template "prometheus-blackbox-exporter.namespace" $ }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/service.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/service.yaml new file mode 100644 index 0000000..e45cbd0 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/service.yaml @@ -0,0 +1,31 @@ +apiVersion: v1 +kind: Service +metadata: +{{- if .Values.service.annotations }} + annotations: + {{- toYaml .Values.service.annotations | nindent 4 }} +{{- end }} + name: {{ include "prometheus-blackbox-exporter.fullname" . }} + namespace: {{ template "prometheus-blackbox-exporter.namespace" . }} + labels: + {{- include "prometheus-blackbox-exporter.labels" . | nindent 4 }} +{{- if .Values.service.labels }} +{{ toYaml .Values.service.labels | indent 4 }} +{{- end }} +spec: +{{- if .Values.service.ipDualStack.enabled }} + ipFamilies: {{ toYaml .Values.service.ipDualStack.ipFamilies | nindent 4 }} + ipFamilyPolicy: {{ .Values.service.ipDualStack.ipFamilyPolicy }} +{{- end }} + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http +{{- if .Values.service.externalIPs }} + externalIPs: +{{ toYaml .Values.service.externalIPs | indent 4 }} +{{- end }} + selector: + {{- include "prometheus-blackbox-exporter.selectorLabels" . | nindent 4 }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/serviceaccount.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/serviceaccount.yaml new file mode 100644 index 0000000..717671d --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "prometheus-blackbox-exporter.serviceAccountName" . }} + namespace: {{ template "prometheus-blackbox-exporter.namespace" . }} + labels: + {{- include "prometheus-blackbox-exporter.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/servicemonitor.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/servicemonitor.yaml new file mode 100644 index 0000000..4372b5e --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/servicemonitor.yaml @@ -0,0 +1,62 @@ +{{- if .Values.serviceMonitor.enabled }} +{{- range .Values.serviceMonitor.targets }} +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "prometheus-blackbox-exporter.fullname" $ }}-{{ .name }} + namespace: {{ template "prometheus-blackbox-exporter.namespace" $ }} + labels: + {{- include "prometheus-blackbox-exporter.labels" $ | nindent 4 }} + {{- if or $.Values.serviceMonitor.defaults.labels .labels }} + {{- toYaml (.labels | default $.Values.serviceMonitor.defaults.labels) | nindent 4 }} + {{- end }} +spec: + endpoints: + - port: http + scheme: {{ $.Values.serviceMonitor.scheme }} + {{- if $.Values.serviceMonitor.bearerTokenFile }} + bearerTokenFile: {{ $.Values.serviceMonitor.bearerTokenFile }} + {{- end }} + {{- if $.Values.serviceMonitor.tlsConfig }} + tlsConfig: {{ toYaml $.Values.serviceMonitor.tlsConfig | nindent 6 }} + {{- end }} + path: {{ $.Values.serviceMonitor.path }} + interval: {{ .interval | default $.Values.serviceMonitor.defaults.interval }} + scrapeTimeout: {{ .scrapeTimeout | default $.Values.serviceMonitor.defaults.scrapeTimeout }} + params: + module: + - {{ .module | default $.Values.serviceMonitor.defaults.module }} + target: + - {{ .url }} + {{- if .hostname }} + hostname: + - {{ .hostname }} + {{- end }} + metricRelabelings: + - sourceLabels: [instance] + targetLabel: instance + replacement: {{ .url }} + action: replace + - sourceLabels: [target] + targetLabel: target + replacement: {{ .name }} + action: replace + {{- range $targetLabel, $replacement := .additionalMetricsRelabels | default $.Values.serviceMonitor.defaults.additionalMetricsRelabels }} + - targetLabel: {{ $targetLabel | quote }} + replacement: {{ $replacement | quote }} + action: replace + {{- end }} +{{- if concat (.additionalRelabeling | default list) $.Values.serviceMonitor.defaults.additionalRelabeling }} + relabelings: +{{ toYaml (concat (.additionalRelabeling | default list) $.Values.serviceMonitor.defaults.additionalRelabeling) | indent 6 }} +{{- end }} + jobLabel: "{{ $.Release.Name }}" + selector: + matchLabels: + {{- include "prometheus-blackbox-exporter.selectorLabels" $ | nindent 6 }} + namespaceSelector: + matchNames: + - {{ template "prometheus-blackbox-exporter.namespace" $ }} +{{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/verticalpodautoscaler.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/verticalpodautoscaler.yaml new file mode 100644 index 0000000..a62d067 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/templates/verticalpodautoscaler.yaml @@ -0,0 +1,44 @@ +{{- if and (.Capabilities.APIVersions.Has "autoscaling.k8s.io/v1") (.Values.verticalPodAutoscaler.enabled) }} +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: {{ include "prometheus-blackbox-exporter.fullname" . }} + namespace: {{ template "prometheus-blackbox-exporter.namespace" . }} + labels: + {{- include "prometheus-blackbox-exporter.labels" . | nindent 4 }} +spec: + {{- with .Values.verticalPodAutoscaler.recommenders }} + recommenders: + {{- toYaml . | nindent 4 }} + {{- end }} + resourcePolicy: + containerPolicies: + - containerName: blackbox-exporter + {{- with .Values.verticalPodAutoscaler.controlledResources }} + controlledResources: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.verticalPodAutoscaler.controlledValues }} + controlledValues: {{ .Values.verticalPodAutoscaler.controlledValues }} + {{- end }} + {{- if .Values.verticalPodAutoscaler.maxAllowed }} + maxAllowed: + {{ toYaml .Values.verticalPodAutoscaler.maxAllowed | nindent 8 }} + {{- end }} + {{- if .Values.verticalPodAutoscaler.minAllowed }} + minAllowed: + {{ toYaml .Values.verticalPodAutoscaler.minAllowed | nindent 8 }} + {{- end }} + targetRef: + apiVersion: apps/v1 + {{- if (eq .Values.kind "DaemonSet") }} + kind: DaemonSet + {{- else }} + kind: Deployment + {{- end }} + name: {{ template "prometheus-blackbox-exporter.fullname" . }} + {{- with .Values.verticalPodAutoscaler.updatePolicy }} + updatePolicy: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/values.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/values.yaml new file mode 100644 index 0000000..ff119a5 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-blackbox-exporter/values.yaml @@ -0,0 +1,399 @@ +global: + ## Global image registry to use if it needs to be overriden for some specific use cases (e.g local registries, custom images, ...) + ## + imageRegistry: "" + +restartPolicy: Always + +kind: Deployment + +## Override the namespace +## +namespaceOverride: "" + +# Override Kubernetes version if your distribution does not follow semver v2 +kubeVersionOverride: "" + +## set to true to add the release label so scraping of the servicemonitor with kube-prometheus-stack works out of the box +releaseLabel: false + +podDisruptionBudget: {} + # maxUnavailable: 0 + +## Allow automount the serviceaccount token for sidecar container (eg: oauthproxy) +automountServiceAccountToken: false + +## Additional blackbox-exporter container environment variables +## For instance to add a http_proxy +## +## extraEnv: +## HTTP_PROXY: "http://superproxy.com:3128" +## NO_PROXY: "localhost,127.0.0.1" +extraEnv: {} + +extraVolumes: [] + # - name: secret-blackbox-oauth-htpasswd + # secret: + # defaultMode: 420 + # secretName: blackbox-oauth-htpasswd + # - name: storage-volume + # persistentVolumeClaim: + # claimName: example + +## Additional volumes that will be attached to the blackbox-exporter container +extraVolumeMounts: + # - name: ca-certs + # mountPath: /etc/ssl/certs/ca-certificates.crt + +## Additional InitContainers to initialize the pod +## +extraInitContainers: [] + +extraContainers: [] + # - name: oAuth2-proxy + # args: + # - -https-address=:9116 + # - -upstream=http://localhost:9115 + # - -skip-auth-regex=^/metrics + # - -openshift-delegate-urls={"/":{"group":"monitoring.coreos.com","resource":"prometheuses","verb":"get"}} + # image: openshift/oauth-proxy:v1.1.0 + # ports: + # - containerPort: 9116 + # name: proxy + # resources: + # limits: + # memory: 16Mi + # requests: + # memory: 4Mi + # cpu: 20m + # volumeMounts: + # - mountPath: /etc/prometheus/secrets/blackbox-tls + # name: secret-blackbox-tls + +## Enable pod security policy +pspEnabled: true + +hostNetwork: false + +strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + type: RollingUpdate + +image: + registry: quay.io + repository: prometheus/blackbox-exporter + # Overrides the image tag whose default is {{ printf "v%s" .Chart.AppVersion }} + tag: "" + pullPolicy: IfNotPresent + digest: "" + + ## Optionally specify an array of imagePullSecrets. + ## Secrets must be manually created in the namespace. + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ + ## + # pullSecrets: + # - myRegistrKeySecretName + +podSecurityContext: {} +# fsGroup: 1000 + +## User and Group to run blackbox-exporter container as +securityContext: + runAsUser: 1000 + runAsGroup: 1000 + readOnlyRootFilesystem: true + runAsNonRoot: true + allowPrivilegeEscalation: false + capabilities: + drop: ["ALL"] +# Add NET_RAW to enable ICMP +# add: ["NET_RAW"] + +livenessProbe: + httpGet: + path: /-/healthy + port: http + failureThreshold: 3 + +readinessProbe: + httpGet: + path: /-/healthy + port: http + +nodeSelector: {} +tolerations: [] +affinity: {} + +## Topology spread constraints rely on node labels to identify the topology domain(s) that each Node is in. +## Ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/ +topologySpreadConstraints: [] + # - maxSkew: 1 + # topologyKey: failure-domain.beta.kubernetes.io/zone + # whenUnsatisfiable: DoNotSchedule + # labelSelector: + # matchLabels: +# app.kubernetes.io/instance: jiralert + +# if the configuration is managed as secret outside the chart, using SealedSecret for example, +# provide the name of the secret here. If secretConfig is set to true, configExistingSecretName will be ignored +# in favor of the config value. +configExistingSecretName: "" +# Store the configuration as a `Secret` instead of a `ConfigMap`, useful in case it contains sensitive data +secretConfig: false +config: + modules: + http_2xx: + prober: http + timeout: 5s + http: + valid_http_versions: ["HTTP/1.1", "HTTP/2.0"] + follow_redirects: true + preferred_ip_protocol: "ip4" + +# Set custom config path, other than default /config/blackbox.yaml. If let empty, path will be "/config/blackbox.yaml" +# configPath: "/foo/bar" + +extraConfigmapMounts: [] + # - name: certs-configmap + # mountPath: /etc/secrets/ssl/ + # subPath: certificates.crt # (optional) + # configMap: certs-configmap + # readOnly: true + # defaultMode: 420 + +## Additional secret mounts +# Defines additional mounts with secrets. Secrets must be manually created in the namespace. +extraSecretMounts: [] + # - name: secret-files + # mountPath: /etc/secrets + # secretName: blackbox-secret-files + # readOnly: true + # defaultMode: 420 + +resources: {} + # limits: + # memory: 300Mi + # requests: + # memory: 50Mi + +priorityClassName: "" + +service: + annotations: {} + labels: {} + type: ClusterIP + port: 9115 + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" + +# Only changes container port. Application port can be changed with extraArgs (--web.listen-address=:9115) +# https://github.com/prometheus/blackbox_exporter/blob/998037b5b40c1de5fee348ffdea8820509d85171/main.go#L55 +containerPort: 9115 + +serviceAccount: + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the fullname template + name: + annotations: {} + +## An Ingress resource can provide name-based virtual hosting and TLS +## termination among other things for CouchDB deployments which are accessed +## from outside the Kubernetes cluster. +## ref: https://kubernetes.io/docs/concepts/services-networking/ingress/ +ingress: + enabled: false + className: "" + labels: {} + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + ## The host property on hosts and tls is passed through helm tpl function. + ## ref: https://helm.sh/docs/developing_charts/#using-the-tpl-function + - host: chart-example.local + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +podAnnotations: {} + +# Hostaliases allow to add additional DNS entries to be injected directly into pods. +# This will take precedence over your implemented DNS solution +hostAliases: [] +# - ip: 192.168.1.1 +# hostNames: +# - test.example.com +# - another.example.net + +pod: + labels: {} + +extraArgs: [] + # - --history.limit=1000 + +replicas: 1 + +serviceMonitor: + ## If true, a ServiceMonitor CRD is created for a prometheus operator + ## https://github.com/coreos/prometheus-operator for blackbox-exporter itself + ## + selfMonitor: + enabled: false + additionalMetricsRelabels: {} + additionalRelabeling: [] + labels: {} + path: /metrics + interval: 30s + scrapeTimeout: 30s + + ## If true, a ServiceMonitor CRD is created for a prometheus operator + ## https://github.com/coreos/prometheus-operator for each target + ## + enabled: false + + # Default values that will be used for all ServiceMonitors created by `targets` + defaults: + additionalMetricsRelabels: {} + additionalRelabeling: [] + labels: {} + interval: 30s + scrapeTimeout: 30s + module: http_2xx + ## scheme: HTTP scheme to use for scraping. Can be used with `tlsConfig` for example if using istio mTLS. + scheme: http + ## path: HTTP path. Needs to be adjusted, if web.route-prefix is set + path: "/probe" + ## tlsConfig: TLS configuration to use when scraping the endpoint. For example if using istio mTLS. + ## Of type: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#tlsconfig + tlsConfig: {} + bearerTokenFile: + + targets: +# - name: example # Human readable URL that will appear in Prometheus / AlertManager +# url: http://example.com/healthz # The URL that blackbox will scrape +# hostname: example.com # HTTP probes can accept an additional `hostname` parameter that will set `Host` header and TLS SNI +# labels: {} # Map of labels for ServiceMonitor. Overrides value set in `defaults` +# interval: 60s # Scraping interval. Overrides value set in `defaults` +# scrapeTimeout: 60s # Scrape timeout. Overrides value set in `defaults` +# module: http_2xx # Module used for scraping. Overrides value set in `defaults` +# additionalMetricsRelabels: {} # Map of metric labels and values to add +# additionalRelabeling: [] # List of metric relabeling actions to run + +## Custom PrometheusRules to be defined +## ref: https://github.com/coreos/prometheus-operator#customresourcedefinitions +prometheusRule: + enabled: false + additionalLabels: {} + namespace: "" + rules: [] + +podMonitoring: + ## If true, a PodMonitoring CR is created for google managed prometheus + ## https://cloud.google.com/stackdriver/docs/managed-prometheus/setup-managed#gmp-pod-monitoring for blackbox-exporter itself + ## + selfMonitor: + enabled: false + additionalMetricsRelabels: {} + labels: {} + path: /metrics + interval: 30s + scrapeTimeout: 30s + + ## If true, a PodMonitoring CR is created for a google managed prometheus + ## https://cloud.google.com/stackdriver/docs/managed-prometheus/setup-managed#gmp-pod-monitoring for each target + ## + enabled: false + + ## Default values that will be used for all PodMonitoring created by `targets` + ## Following PodMonitoring API specs https://github.com/GoogleCloudPlatform/prometheus-engine/blob/main/doc/api.md#scrapeendpoint + defaults: + additionalMetricsRelabels: {} + labels: {} + interval: 30s + scrapeTimeout: 30s + module: http_2xx + ## scheme: Protocol scheme to use to scrape. + scheme: http + ## path: HTTP path. Needs to be adjusted, if web.route-prefix is set + path: "/probe" + ## tlsConfig: TLS configuration to use when scraping the endpoint. For example if using istio mTLS. + ## Of type: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#tlsconfig + tlsConfig: {} + + targets: +# - name: example # Human readable URL that will appear in Google Managed Prometheus / AlertManager +# url: http://example.com/healthz # The URL that blackbox will scrape +# hostname: example.com # HTTP probes can accept an additional `hostname` parameter that will set `Host` header and TLS SNI +# labels: {} # Map of labels for PodMonitoring. Overrides value set in `defaults` +# interval: 60s # Scraping interval. Overrides value set in `defaults` +# scrapeTimeout: 60s # Scrape timeout. Overrides value set in `defaults` +# module: http_2xx # Module used for scraping. Overrides value set in `defaults` +# additionalMetricsRelabels: {} # Map of metric labels and values to add + +## Network policy for chart +networkPolicy: + # Enable network policy and allow access from anywhere + enabled: false + # Limit access only from monitoring namespace + # Before setting this value to true, you must add the name=monitoring label to the monitoring namespace + # Network Policy uses label filtering + allowMonitoringNamespace: false + +## dnsPolicy and dnsConfig for Deployments and Daemonsets if you want non-default settings. +## These will be passed directly to the PodSpec of same. +dnsPolicy: +dnsConfig: + +# Extra manifests to deploy as an array +extraManifests: [] + # - apiVersion: v1 + # kind: ConfigMap + # metadata: + # labels: + # name: prometheus-extra + # data: + # extra-data: "value" + +# global common labels, applied to all ressources +commonLabels: {} + +# Enable vertical pod autoscaler support for prometheus-blackbox-exporter +verticalPodAutoscaler: + enabled: false + + # Recommender responsible for generating recommendation for the object. + # List should be empty (then the default recommender will generate the recommendation) + # or contain exactly one recommender. + # recommenders: + # - name: custom-recommender-performance + + # List of resources that the vertical pod autoscaler can control. Defaults to cpu and memory + controlledResources: [] + # Specifies which resource values should be controlled: RequestsOnly or RequestsAndLimits. + # controlledValues: RequestsAndLimits + + # Define the max allowed resources for the pod + maxAllowed: {} + # cpu: 200m + # memory: 100Mi + # Define the min allowed resources for the pod + minAllowed: {} + # cpu: 200m + # memory: 100Mi + + updatePolicy: + # Specifies minimal number of replicas which need to be alive for VPA Updater to attempt pod eviction + # minReplicas: 1 + # Specifies whether recommended updates are applied when a Pod is started and whether recommended updates + # are applied during the life of a Pod. Possible values are "Off", "Initial", "Recreate", and "Auto". + updateMode: Auto diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/.helmignore b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/.helmignore new file mode 100644 index 0000000..daebc7d --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/.helmignore @@ -0,0 +1,21 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/Chart.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/Chart.yaml new file mode 100644 index 0000000..d7cb3b9 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/Chart.yaml @@ -0,0 +1,25 @@ +apiVersion: v2 +name: prometheus-node-exporter +description: A Helm chart for prometheus node-exporter +keywords: + - node-exporter + - prometheus + - exporter +type: application +version: 4.24.0 +appVersion: 1.7.0 +home: https://github.com/prometheus/node_exporter/ +sources: + - https://github.com/prometheus/node_exporter/ +maintainers: + - email: gianrubio@gmail.com + name: gianrubio + - email: zanhsieh@gmail.com + name: zanhsieh + - email: rootsandtrees@posteo.de + name: zeritti +annotations: + "artifacthub.io/license": Apache-2.0 + "artifacthub.io/links": | + - name: Chart Source + url: https://github.com/prometheus-community/helm-charts diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/README.md b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/README.md new file mode 100644 index 0000000..8124a2c --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/README.md @@ -0,0 +1,96 @@ +# Prometheus Node Exporter + +Prometheus exporter for hardware and OS metrics exposed by *NIX kernels, written in Go with pluggable metric collectors. + +This chart bootstraps a Prometheus [Node Exporter](http://github.com/prometheus/node_exporter) daemonset on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager. + +## Get Repository Info + +```console +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +``` + +_See [helm repo](https://helm.sh/docs/helm/helm_repo/) for command documentation._ + +## Install Chart + +```console +helm install [RELEASE_NAME] prometheus-community/prometheus-node-exporter +``` + +_See [configuration](#configuring) below._ + +_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ + +## Uninstall Chart + +```console +helm uninstall [RELEASE_NAME] +``` + +This removes all the Kubernetes components associated with the chart and deletes the release. + +_See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall/) for command documentation._ + +## Upgrading Chart + +```console +helm upgrade [RELEASE_NAME] prometheus-community/prometheus-node-exporter --install +``` + +_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._ + +### 3.x to 4.x + +Starting from version 4.0.0, the `node exporter` chart is using the [Kubernetes recommended labels](https://kubernetes.io/docs/concepts/overview/working-with-objects/common-labels/). Therefore you have to delete the daemonset before you upgrade. + +```console +kubectl delete daemonset -l app=prometheus-node-exporter +helm upgrade -i prometheus-node-exporter prometheus-community/prometheus-node-exporter +``` + +If you use your own custom [ServiceMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#servicemonitor) or [PodMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#podmonitor), please ensure to upgrade their `selector` fields accordingly to the new labels. + +### From 2.x to 3.x + +Change the following: + +```yaml +hostRootFsMount: true +``` + +to: + +```yaml +hostRootFsMount: + enabled: true + mountPropagation: HostToContainer +``` + +## Configuring + +See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments, visit the chart's [values.yaml](./values.yaml), or run these configuration commands: + +```console +helm show values prometheus-community/prometheus-node-exporter +``` + +### kube-rbac-proxy + +You can enable `prometheus-node-exporter` endpoint protection using `kube-rbac-proxy`. By setting `kubeRBACProxy.enabled: true`, this chart will deploy a RBAC proxy container protecting the node-exporter endpoint. +To authorize access, authenticate your requests (via a `ServiceAccount` for example) with a `ClusterRole` attached such as: + +```yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: prometheus-node-exporter-read +rules: + - apiGroups: [ "" ] + resources: ["services/node-exporter-prometheus-node-exporter"] + verbs: + - get +``` + +See [kube-rbac-proxy examples](https://github.com/brancz/kube-rbac-proxy/tree/master/examples/resource-attributes) for more details. diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/ci/port-values.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/ci/port-values.yaml new file mode 100644 index 0000000..d24fde5 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/ci/port-values.yaml @@ -0,0 +1,3 @@ +service: + targetPort: 9102 + port: 9102 diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/NOTES.txt b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/NOTES.txt new file mode 100644 index 0000000..31b596d --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/NOTES.txt @@ -0,0 +1,29 @@ +1. Get the application URL by running these commands: +{{- if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ template "prometheus-node-exporter.namespace" . }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "prometheus-node-exporter.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ template "prometheus-node-exporter.namespace" . }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get svc -w {{ template "prometheus-node-exporter.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ template "prometheus-node-exporter.namespace" . }} {{ template "prometheus-node-exporter.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ template "prometheus-node-exporter.namespace" . }} -l "app.kubernetes.io/name={{ template "prometheus-node-exporter.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:9100 to use your application" + kubectl port-forward --namespace {{ template "prometheus-node-exporter.namespace" . }} $POD_NAME 9100 +{{- end }} + +{{- if .Values.kubeRBACProxy.enabled}} + +kube-rbac-proxy endpoint protections is enabled: +- Metrics endpoints is now HTTPS +- Ensure that the client authenticates the requests (e.g. via service account) with the following role permissions: +``` +rules: + - apiGroups: [ "" ] + resources: ["services/{{ template "prometheus-node-exporter.fullname" . }}"] + verbs: + - get +``` +{{- end }} \ No newline at end of file diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/_helpers.tpl b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/_helpers.tpl new file mode 100644 index 0000000..12e7834 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/_helpers.tpl @@ -0,0 +1,185 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "prometheus-node-exporter.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "prometheus-node-exporter.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "prometheus-node-exporter.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "prometheus-node-exporter.labels" -}} +helm.sh/chart: {{ include "prometheus-node-exporter.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/component: metrics +app.kubernetes.io/part-of: {{ include "prometheus-node-exporter.name" . }} +{{ include "prometheus-node-exporter.selectorLabels" . }} +{{- with .Chart.AppVersion }} +app.kubernetes.io/version: {{ . | quote }} +{{- end }} +{{- with .Values.podLabels }} +{{ toYaml . }} +{{- end }} +{{- if .Values.releaseLabel }} +release: {{ .Release.Name }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "prometheus-node-exporter.selectorLabels" -}} +app.kubernetes.io/name: {{ include "prometheus-node-exporter.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + + +{{/* +Create the name of the service account to use +*/}} +{{- define "prometheus-node-exporter.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "prometheus-node-exporter.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +The image to use +*/}} +{{- define "prometheus-node-exporter.image" -}} +{{- if .Values.image.sha }} +{{- fail "image.sha forbidden. Use image.digest instead" }} +{{- else if .Values.image.digest }} +{{- if .Values.global.imageRegistry }} +{{- printf "%s/%s:%s@%s" .Values.global.imageRegistry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) .Values.image.digest }} +{{- else }} +{{- printf "%s/%s:%s@%s" .Values.image.registry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) .Values.image.digest }} +{{- end }} +{{- else }} +{{- if .Values.global.imageRegistry }} +{{- printf "%s/%s:%s" .Values.global.imageRegistry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }} +{{- else }} +{{- printf "%s/%s:%s" .Values.image.registry .Values.image.repository (default (printf "v%s" .Chart.AppVersion) .Values.image.tag) }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Allow the release namespace to be overridden for multi-namespace deployments in combined charts +*/}} +{{- define "prometheus-node-exporter.namespace" -}} +{{- if .Values.namespaceOverride }} +{{- .Values.namespaceOverride }} +{{- else }} +{{- .Release.Namespace }} +{{- end }} +{{- end }} + +{{/* +Create the namespace name of the service monitor +*/}} +{{- define "prometheus-node-exporter.monitor-namespace" -}} +{{- if .Values.namespaceOverride }} +{{- .Values.namespaceOverride }} +{{- else }} +{{- if .Values.prometheus.monitor.namespace }} +{{- .Values.prometheus.monitor.namespace }} +{{- else }} +{{- .Release.Namespace }} +{{- end }} +{{- end }} +{{- end }} + +{{/* Sets default scrape limits for servicemonitor */}} +{{- define "servicemonitor.scrapeLimits" -}} +{{- with .sampleLimit }} +sampleLimit: {{ . }} +{{- end }} +{{- with .targetLimit }} +targetLimit: {{ . }} +{{- end }} +{{- with .labelLimit }} +labelLimit: {{ . }} +{{- end }} +{{- with .labelNameLengthLimit }} +labelNameLengthLimit: {{ . }} +{{- end }} +{{- with .labelValueLengthLimit }} +labelValueLengthLimit: {{ . }} +{{- end }} +{{- end }} + +{{/* +Formats imagePullSecrets. Input is (dict "Values" .Values "imagePullSecrets" .{specific imagePullSecrets}) +*/}} +{{- define "prometheus-node-exporter.imagePullSecrets" -}} +{{- range (concat .Values.global.imagePullSecrets .imagePullSecrets) }} + {{- if eq (typeOf .) "map[string]interface {}" }} +- {{ toYaml . | trim }} + {{- else }} +- name: {{ . }} + {{- end }} +{{- end }} +{{- end -}} + +{{/* +Create the namespace name of the pod monitor +*/}} +{{- define "prometheus-node-exporter.podmonitor-namespace" -}} +{{- if .Values.namespaceOverride }} +{{- .Values.namespaceOverride }} +{{- else }} +{{- if .Values.prometheus.podMonitor.namespace }} +{{- .Values.prometheus.podMonitor.namespace }} +{{- else }} +{{- .Release.Namespace }} +{{- end }} +{{- end }} +{{- end }} + +{{/* Sets default scrape limits for podmonitor */}} +{{- define "podmonitor.scrapeLimits" -}} +{{- with .sampleLimit }} +sampleLimit: {{ . }} +{{- end }} +{{- with .targetLimit }} +targetLimit: {{ . }} +{{- end }} +{{- with .labelLimit }} +labelLimit: {{ . }} +{{- end }} +{{- with .labelNameLengthLimit }} +labelNameLengthLimit: {{ . }} +{{- end }} +{{- with .labelValueLengthLimit }} +labelValueLengthLimit: {{ . }} +{{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/clusterrole.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/clusterrole.yaml new file mode 100644 index 0000000..4c2b770 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/clusterrole.yaml @@ -0,0 +1,19 @@ +{{- if and (eq .Values.rbac.create true) (eq .Values.kubeRBACProxy.enabled true) -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} +rules: + {{- if $.Values.kubeRBACProxy.enabled }} + - apiGroups: [ "authentication.k8s.io" ] + resources: + - tokenreviews + verbs: [ "create" ] + - apiGroups: [ "authorization.k8s.io" ] + resources: + - subjectaccessreviews + verbs: [ "create" ] + {{- end }} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/clusterrolebinding.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/clusterrolebinding.yaml new file mode 100644 index 0000000..a6e24a0 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/clusterrolebinding.yaml @@ -0,0 +1,20 @@ +{{- if and (eq .Values.rbac.create true) (eq .Values.kubeRBACProxy.enabled true) -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} + name: {{ template "prometheus-node-exporter.fullname" . }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole +{{- if .Values.rbac.useExistingRole }} + name: {{ .Values.rbac.useExistingRole }} +{{- else }} + name: {{ template "prometheus-node-exporter.fullname" . }} +{{- end }} +subjects: +- kind: ServiceAccount + name: {{ template "prometheus-node-exporter.serviceAccountName" . }} + namespace: {{ template "prometheus-node-exporter.namespace" . }} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/daemonset.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/daemonset.yaml new file mode 100644 index 0000000..bf7ae0a --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/daemonset.yaml @@ -0,0 +1,285 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} + {{- with .Values.daemonsetAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + {{- include "prometheus-node-exporter.selectorLabels" . | nindent 6 }} + revisionHistoryLimit: {{ .Values.revisionHistoryLimit }} + {{- with .Values.updateStrategy }} + updateStrategy: + {{- toYaml . | nindent 4 }} + {{- end }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 8 }} + spec: + automountServiceAccountToken: {{ ternary true false (or .Values.serviceAccount.automountServiceAccountToken .Values.kubeRBACProxy.enabled) }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.priorityClassName }} + priorityClassName: {{ . }} + {{- end }} + {{- with .Values.extraInitContainers }} + initContainers: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "prometheus-node-exporter.serviceAccountName" . }} + containers: + {{- $servicePort := ternary 8100 .Values.service.port .Values.kubeRBACProxy.enabled }} + - name: node-exporter + image: {{ include "prometheus-node-exporter.image" . }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + args: + - --path.procfs=/host/proc + - --path.sysfs=/host/sys + {{- if .Values.hostRootFsMount.enabled }} + - --path.rootfs=/host/root + {{- if semverCompare ">=1.4.0" (default .Chart.AppVersion .Values.image.tag) }} + - --path.udev.data=/host/root/run/udev/data + {{- end }} + {{- end }} + - --web.listen-address=[$(HOST_IP)]:{{ $servicePort }} + {{- with .Values.extraArgs }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + env: + - name: HOST_IP + {{- if .Values.kubeRBACProxy.enabled }} + value: 127.0.0.1 + {{- else if .Values.service.listenOnAllInterfaces }} + value: 0.0.0.0 + {{- else }} + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + {{- end }} + {{- range $key, $value := .Values.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + {{- if eq .Values.kubeRBACProxy.enabled false }} + ports: + - name: {{ .Values.service.portName }} + containerPort: {{ .Values.service.port }} + protocol: TCP + {{- end }} + livenessProbe: + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + httpGet: + {{- if .Values.kubeRBACProxy.enabled }} + host: 127.0.0.1 + {{- end }} + httpHeaders: + {{- range $_, $header := .Values.livenessProbe.httpGet.httpHeaders }} + - name: {{ $header.name }} + value: {{ $header.value }} + {{- end }} + path: / + port: {{ $servicePort }} + scheme: {{ upper .Values.livenessProbe.httpGet.scheme }} + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + readinessProbe: + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + httpGet: + {{- if .Values.kubeRBACProxy.enabled }} + host: 127.0.0.1 + {{- end }} + httpHeaders: + {{- range $_, $header := .Values.readinessProbe.httpGet.httpHeaders }} + - name: {{ $header.name }} + value: {{ $header.value }} + {{- end }} + path: / + port: {{ $servicePort }} + scheme: {{ upper .Values.readinessProbe.httpGet.scheme }} + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: proc + mountPath: /host/proc + readOnly: true + - name: sys + mountPath: /host/sys + readOnly: true + {{- if .Values.hostRootFsMount.enabled }} + - name: root + mountPath: /host/root + {{- with .Values.hostRootFsMount.mountPropagation }} + mountPropagation: {{ . }} + {{- end }} + readOnly: true + {{- end }} + {{- range $_, $mount := .Values.extraHostVolumeMounts }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + readOnly: {{ $mount.readOnly }} + {{- with $mount.mountPropagation }} + mountPropagation: {{ . }} + {{- end }} + {{- end }} + {{- range $_, $mount := .Values.sidecarVolumeMount }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + readOnly: true + {{- end }} + {{- range $_, $mount := .Values.configmaps }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + {{- end }} + {{- range $_, $mount := .Values.secrets }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + {{- end }} + {{- with .Values.sidecars }} + {{- toYaml . | nindent 8 }} + {{- if or $.Values.sidecarVolumeMount $.Values.sidecarHostVolumeMounts }} + volumeMounts: + {{- range $_, $mount := $.Values.sidecarVolumeMount }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + readOnly: {{ $mount.readOnly }} + {{- end }} + {{- range $_, $mount := $.Values.sidecarHostVolumeMounts }} + - name: {{ $mount.name }} + mountPath: {{ $mount.mountPath }} + readOnly: {{ $mount.readOnly }} + {{- if $mount.mountPropagation }} + mountPropagation: {{ $mount.mountPropagation }} + {{- end }} + {{- end }} + {{- end }} + {{- end }} + {{- if .Values.kubeRBACProxy.enabled }} + - name: kube-rbac-proxy + args: + {{- if .Values.kubeRBACProxy.extraArgs }} + {{- .Values.kubeRBACProxy.extraArgs | toYaml | nindent 12 }} + {{- end }} + - --secure-listen-address=:{{ .Values.service.port}} + - --upstream=http://127.0.0.1:{{ $servicePort }}/ + - --proxy-endpoints-port=8888 + - --config-file=/etc/kube-rbac-proxy-config/config-file.yaml + volumeMounts: + - name: kube-rbac-proxy-config + mountPath: /etc/kube-rbac-proxy-config + imagePullPolicy: {{ .Values.kubeRBACProxy.image.pullPolicy }} + {{- if .Values.kubeRBACProxy.image.sha }} + image: "{{ .Values.global.imageRegistry | default .Values.kubeRBACProxy.image.registry}}/{{ .Values.kubeRBACProxy.image.repository }}:{{ .Values.kubeRBACProxy.image.tag }}@sha256:{{ .Values.kubeRBACProxy.image.sha }}" + {{- else }} + image: "{{ .Values.global.imageRegistry | default .Values.kubeRBACProxy.image.registry}}/{{ .Values.kubeRBACProxy.image.repository }}:{{ .Values.kubeRBACProxy.image.tag }}" + {{- end }} + ports: + - containerPort: {{ .Values.service.port}} + name: "http" + - containerPort: 8888 + name: "http-healthz" + readinessProbe: + httpGet: + scheme: HTTPS + port: 8888 + path: healthz + initialDelaySeconds: 5 + timeoutSeconds: 5 + {{- if .Values.kubeRBACProxy.resources }} + resources: + {{ toYaml .Values.kubeRBACProxy.resources | nindent 12 }} + {{- end }} + {{- if .Values.kubeRBACProxy.containerSecurityContext }} + securityContext: + {{ toYaml .Values.kubeRBACProxy.containerSecurityContext | nindent 12 }} + {{- end }} + {{- end }} + {{- if or .Values.imagePullSecrets .Values.global.imagePullSecrets }} + imagePullSecrets: + {{- include "prometheus-node-exporter.imagePullSecrets" (dict "Values" .Values "imagePullSecrets" .Values.imagePullSecrets) | indent 8 }} + {{- end }} + hostNetwork: {{ .Values.hostNetwork }} + hostPID: {{ .Values.hostPID }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.dnsConfig }} + dnsConfig: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: proc + hostPath: + path: /proc + - name: sys + hostPath: + path: /sys + {{- if .Values.hostRootFsMount.enabled }} + - name: root + hostPath: + path: / + {{- end }} + {{- range $_, $mount := .Values.extraHostVolumeMounts }} + - name: {{ $mount.name }} + hostPath: + path: {{ $mount.hostPath }} + {{- end }} + {{- range $_, $mount := .Values.sidecarVolumeMount }} + - name: {{ $mount.name }} + emptyDir: + medium: Memory + {{- end }} + {{- range $_, $mount := .Values.sidecarHostVolumeMounts }} + - name: {{ $mount.name }} + hostPath: + path: {{ $mount.hostPath }} + {{- end }} + {{- range $_, $mount := .Values.configmaps }} + - name: {{ $mount.name }} + configMap: + name: {{ $mount.name }} + {{- end }} + {{- range $_, $mount := .Values.secrets }} + - name: {{ $mount.name }} + secret: + secretName: {{ $mount.name }} + {{- end }} + {{- if .Values.kubeRBACProxy.enabled }} + - name: kube-rbac-proxy-config + configMap: + name: {{ template "prometheus-node-exporter.fullname" . }}-rbac-config + {{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/endpoints.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/endpoints.yaml new file mode 100644 index 0000000..805a25a --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/endpoints.yaml @@ -0,0 +1,18 @@ +{{- if .Values.endpoints }} +apiVersion: v1 +kind: Endpoints +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} +subsets: + - addresses: + {{- range .Values.endpoints }} + - ip: {{ . }} + {{- end }} + ports: + - name: {{ .Values.service.portName }} + port: 9100 + protocol: TCP +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/extra-manifests.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/extra-manifests.yaml new file mode 100644 index 0000000..1fa3a3c --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/extra-manifests.yaml @@ -0,0 +1,4 @@ +{{ range .Values.extraManifests }} +--- +{{ tpl . $ }} +{{ end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/networkpolicy.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/networkpolicy.yaml new file mode 100644 index 0000000..5cd87b0 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/networkpolicy.yaml @@ -0,0 +1,23 @@ +{{- if .Values.networkPolicy.enabled }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" $ | nindent 4 }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + ingress: + - ports: + - port: {{ .Values.service.port }} + policyTypes: + - Egress + - Ingress + podSelector: + matchLabels: + {{- include "prometheus-node-exporter.selectorLabels" . | nindent 6 }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/podmonitor.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/podmonitor.yaml new file mode 100644 index 0000000..cb56292 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/podmonitor.yaml @@ -0,0 +1,91 @@ +{{- if .Values.prometheus.podMonitor.enabled }} +apiVersion: {{ .Values.prometheus.podMonitor.apiVersion | default "monitoring.coreos.com/v1" }} +kind: PodMonitor +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.podmonitor-namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} + {{- with .Values.prometheus.podMonitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + jobLabel: {{ default "app.kubernetes.io/name" .Values.prometheus.podMonitor.jobLabel }} + {{- include "podmonitor.scrapeLimits" .Values.prometheus.podMonitor | nindent 2 }} + selector: + matchLabels: + {{- with .Values.prometheus.podMonitor.selectorOverride }} + {{- toYaml . | nindent 6 }} + {{- else }} + {{- include "prometheus-node-exporter.selectorLabels" . | nindent 6 }} + {{- end }} + namespaceSelector: + matchNames: + - {{ include "prometheus-node-exporter.namespace" . }} + {{- with .Values.prometheus.podMonitor.attachMetadata }} + attachMetadata: + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.podTargetLabels }} + podTargetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} + podMetricsEndpoints: + - port: {{ .Values.service.portName }} + {{- with .Values.prometheus.podMonitor.scheme }} + scheme: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.path }} + path: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.basicAuth }} + basicAuth: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.bearerTokenSecret }} + bearerTokenSecret: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.tlsConfig }} + tlsConfig: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.authorization }} + authorization: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.oauth2 }} + oauth2: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.proxyUrl }} + proxyUrl: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.honorTimestamps }} + honorTimestamps: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.honorLabels }} + honorLabels: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.scrapeTimeout }} + scrapeTimeout: {{ . }} + {{- end }} + {{- with .Values.prometheus.podMonitor.relabelings }} + relabelings: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.podMonitor.metricRelabelings }} + metricRelabelings: + {{- toYaml . | nindent 8 }} + {{- end }} + enableHttp2: {{ default false .Values.prometheus.podMonitor.enableHttp2 }} + filterRunning: {{ default true .Values.prometheus.podMonitor.filterRunning }} + followRedirects: {{ default false .Values.prometheus.podMonitor.followRedirects }} + {{- with .Values.prometheus.podMonitor.params }} + params: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/psp-clusterrole.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/psp-clusterrole.yaml new file mode 100644 index 0000000..9124f08 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/psp-clusterrole.yaml @@ -0,0 +1,14 @@ +{{- if and .Values.rbac.create .Values.rbac.pspEnabled (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: psp-{{ include "prometheus-node-exporter.fullname" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} +rules: +- apiGroups: ['extensions'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ include "prometheus-node-exporter.fullname" . }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/psp-clusterrolebinding.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/psp-clusterrolebinding.yaml new file mode 100644 index 0000000..7a0b078 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/psp-clusterrolebinding.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.rbac.create .Values.rbac.pspEnabled (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: psp-{{ include "prometheus-node-exporter.fullname" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: psp-{{ include "prometheus-node-exporter.fullname" . }} +subjects: + - kind: ServiceAccount + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.namespace" . }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/psp.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/psp.yaml new file mode 100644 index 0000000..be180fe --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/psp.yaml @@ -0,0 +1,49 @@ +{{- if and .Values.rbac.create .Values.rbac.pspEnabled (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} + {{- with .Values.rbac.pspAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + privileged: false + # Allow core volume types. + volumes: + - 'configMap' + - 'emptyDir' + - 'projected' + - 'secret' + - 'downwardAPI' + - 'persistentVolumeClaim' + - 'hostPath' + hostNetwork: true + hostIPC: false + hostPID: true + hostPorts: + - min: 0 + max: 65535 + runAsUser: + # Permits the container to run with root privileges as well. + rule: 'RunAsAny' + seLinux: + # This policy assumes the nodes are using AppArmor rather than SELinux. + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + # Allow adding the root group. + - min: 0 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + # Allow adding the root group. + - min: 0 + max: 65535 + readOnlyRootFilesystem: false +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/rbac-configmap.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/rbac-configmap.yaml new file mode 100644 index 0000000..14419e3 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/rbac-configmap.yaml @@ -0,0 +1,16 @@ +{{- if .Values.kubeRBACProxy.enabled}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "prometheus-node-exporter.fullname" . }}-rbac-config + namespace: {{ include "prometheus-node-exporter.namespace" . }} +data: + config-file.yaml: |+ + authorization: + resourceAttributes: + namespace: {{ template "prometheus-node-exporter.namespace" . }} + apiVersion: v1 + resource: services + subresource: {{ template "prometheus-node-exporter.fullname" . }} + name: {{ template "prometheus-node-exporter.fullname" . }} +{{- end }} \ No newline at end of file diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/service.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/service.yaml new file mode 100644 index 0000000..da2b6da --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/service.yaml @@ -0,0 +1,29 @@ +{{- if .Values.service.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" $ | nindent 4 }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: +{{- if .Values.service.ipDualStack.enabled }} + ipFamilies: {{ toYaml .Values.service.ipDualStack.ipFamilies | nindent 4 }} + ipFamilyPolicy: {{ .Values.service.ipDualStack.ipFamilyPolicy }} +{{- end }} + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + {{- if ( and (eq .Values.service.type "NodePort" ) (not (empty .Values.service.nodePort)) ) }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} + targetPort: {{ .Values.service.targetPort }} + protocol: TCP + name: {{ .Values.service.portName }} + selector: + {{- include "prometheus-node-exporter.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/serviceaccount.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/serviceaccount.yaml new file mode 100644 index 0000000..e0a35bf --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/serviceaccount.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.rbac.create .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "prometheus-node-exporter.serviceAccountName" . }} + namespace: {{ include "prometheus-node-exporter.namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- if or .Values.serviceAccount.imagePullSecrets .Values.global.imagePullSecrets }} +imagePullSecrets: + {{- include "prometheus-node-exporter.imagePullSecrets" (dict "Values" .Values "imagePullSecrets" .Values.serviceAccount.imagePullSecrets) | indent 2 }} +{{- end }} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/servicemonitor.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/servicemonitor.yaml new file mode 100644 index 0000000..6f53a58 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/servicemonitor.yaml @@ -0,0 +1,61 @@ +{{- if .Values.prometheus.monitor.enabled }} +apiVersion: {{ .Values.prometheus.monitor.apiVersion | default "monitoring.coreos.com/v1" }} +kind: ServiceMonitor +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.monitor-namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} + {{- with .Values.prometheus.monitor.additionalLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + jobLabel: {{ default "app.kubernetes.io/name" .Values.prometheus.monitor.jobLabel }} + {{- include "servicemonitor.scrapeLimits" .Values.prometheus.monitor | nindent 2 }} + {{- with .Values.prometheus.monitor.podTargetLabels }} + podTargetLabels: + {{- toYaml . | nindent 4 }} + {{- end }} + selector: + matchLabels: + {{- with .Values.prometheus.monitor.selectorOverride }} + {{- toYaml . | nindent 6 }} + {{- else }} + {{- include "prometheus-node-exporter.selectorLabels" . | nindent 6 }} + {{- end }} + {{- with .Values.prometheus.monitor.attachMetadata }} + attachMetadata: + {{- toYaml . | nindent 4 }} + {{- end }} + endpoints: + - port: {{ .Values.service.portName }} + scheme: {{ .Values.prometheus.monitor.scheme }} + {{- with .Values.prometheus.monitor.basicAuth }} + basicAuth: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.monitor.bearerTokenFile }} + bearerTokenFile: {{ . }} + {{- end }} + {{- with .Values.prometheus.monitor.tlsConfig }} + tlsConfig: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.monitor.proxyUrl }} + proxyUrl: {{ . }} + {{- end }} + {{- with .Values.prometheus.monitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.prometheus.monitor.scrapeTimeout }} + scrapeTimeout: {{ . }} + {{- end }} + {{- with .Values.prometheus.monitor.relabelings }} + relabelings: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.prometheus.monitor.metricRelabelings }} + metricRelabelings: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/verticalpodautoscaler.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/verticalpodautoscaler.yaml new file mode 100644 index 0000000..786ded5 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/templates/verticalpodautoscaler.yaml @@ -0,0 +1,40 @@ +{{- if and (.Capabilities.APIVersions.Has "autoscaling.k8s.io/v1") (.Values.verticalPodAutoscaler.enabled) }} +apiVersion: autoscaling.k8s.io/v1 +kind: VerticalPodAutoscaler +metadata: + name: {{ include "prometheus-node-exporter.fullname" . }} + namespace: {{ include "prometheus-node-exporter.namespace" . }} + labels: + {{- include "prometheus-node-exporter.labels" . | nindent 4 }} +spec: + {{- with .Values.verticalPodAutoscaler.recommenders }} + recommenders: + {{- toYaml . | nindent 4 }} + {{- end }} + resourcePolicy: + containerPolicies: + - containerName: node-exporter + {{- with .Values.verticalPodAutoscaler.controlledResources }} + controlledResources: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.verticalPodAutoscaler.controlledValues }} + controlledValues: {{ . }} + {{- end }} + {{- with .Values.verticalPodAutoscaler.maxAllowed }} + maxAllowed: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.verticalPodAutoscaler.minAllowed }} + minAllowed: + {{- toYaml . | nindent 8 }} + {{- end }} + targetRef: + apiVersion: apps/v1 + kind: DaemonSet + name: {{ include "prometheus-node-exporter.fullname" . }} + {{- with .Values.verticalPodAutoscaler.updatePolicy }} + updatePolicy: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/values.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/values.yaml new file mode 100644 index 0000000..ac865fc --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-node-exporter/values.yaml @@ -0,0 +1,481 @@ +# Default values for prometheus-node-exporter. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. +image: + registry: quay.io + repository: prometheus/node-exporter + # Overrides the image tag whose default is {{ printf "v%s" .Chart.AppVersion }} + tag: "" + pullPolicy: IfNotPresent + digest: "" + +imagePullSecrets: [] +# - name: "image-pull-secret" +nameOverride: "" +fullnameOverride: "" + +# Number of old history to retain to allow rollback +# Default Kubernetes value is set to 10 +revisionHistoryLimit: 10 + +global: + # To help compatibility with other charts which use global.imagePullSecrets. + # Allow either an array of {name: pullSecret} maps (k8s-style), or an array of strings (more common helm-style). + # global: + # imagePullSecrets: + # - name: pullSecret1 + # - name: pullSecret2 + # or + # global: + # imagePullSecrets: + # - pullSecret1 + # - pullSecret2 + imagePullSecrets: [] + # + # Allow parent charts to override registry hostname + imageRegistry: "" + +# Configure kube-rbac-proxy. When enabled, creates a kube-rbac-proxy to protect the node-exporter http endpoint. +# The requests are served through the same service but requests are HTTPS. +kubeRBACProxy: + enabled: false + image: + registry: quay.io + repository: brancz/kube-rbac-proxy + tag: v0.15.0 + sha: "" + pullPolicy: IfNotPresent + + # List of additional cli arguments to configure kube-rbac-prxy + # for example: --tls-cipher-suites, --log-file, etc. + # all the possible args can be found here: https://github.com/brancz/kube-rbac-proxy#usage + extraArgs: [] + + ## Specify security settings for a Container + ## Allows overrides and additional options compared to (Pod) securityContext + ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container + containerSecurityContext: {} + + resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 64Mi + # requests: + # cpu: 10m + # memory: 32Mi + +service: + enabled: true + type: ClusterIP + port: 9100 + targetPort: 9100 + nodePort: + portName: metrics + listenOnAllInterfaces: true + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9100" + ipDualStack: + enabled: false + ipFamilies: ["IPv6", "IPv4"] + ipFamilyPolicy: "PreferDualStack" + +# Set a NetworkPolicy with: +# ingress only on service.port +# no egress permitted +networkPolicy: + enabled: false + +# Additional environment variables that will be passed to the daemonset +env: {} +## env: +## VARIABLE: value + +prometheus: + monitor: + enabled: false + additionalLabels: {} + namespace: "" + + jobLabel: "" + + # List of pod labels to add to node exporter metrics + # https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#servicemonitor + podTargetLabels: [] + + scheme: http + basicAuth: {} + bearerTokenFile: + tlsConfig: {} + + ## proxyUrl: URL of a proxy that should be used for scraping. + ## + proxyUrl: "" + + ## Override serviceMonitor selector + ## + selectorOverride: {} + + ## Attach node metadata to discovered targets. Requires Prometheus v2.35.0 and above. + ## + attachMetadata: + node: false + + relabelings: [] + metricRelabelings: [] + interval: "" + scrapeTimeout: 10s + ## prometheus.monitor.apiVersion ApiVersion for the serviceMonitor Resource(defaults to "monitoring.coreos.com/v1") + apiVersion: "" + + ## SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + ## + sampleLimit: 0 + + ## TargetLimit defines a limit on the number of scraped targets that will be accepted. + ## + targetLimit: 0 + + ## Per-scrape limit on number of labels that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelLimit: 0 + + ## Per-scrape limit on length of labels name that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelNameLengthLimit: 0 + + ## Per-scrape limit on length of labels value that will be accepted for a sample. Only valid in Prometheus versions 2.27.0 and newer. + ## + labelValueLengthLimit: 0 + + # PodMonitor defines monitoring for a set of pods. + # ref. https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.PodMonitor + # Using a PodMonitor may be preferred in some environments where there is very large number + # of Node Exporter endpoints (1000+) behind a single service. + # The PodMonitor is disabled by default. When switching from ServiceMonitor to PodMonitor, + # the time series resulting from the configuration through PodMonitor may have different labels. + # For instance, there will not be the service label any longer which might + # affect PromQL queries selecting that label. + podMonitor: + enabled: false + # Namespace in which to deploy the pod monitor. Defaults to the release namespace. + namespace: "" + # Additional labels, e.g. setting a label for pod monitor selector as set in prometheus + additionalLabels: {} + # release: kube-prometheus-stack + # PodTargetLabels transfers labels of the Kubernetes Pod onto the target. + podTargetLabels: [] + # apiVersion defaults to monitoring.coreos.com/v1. + apiVersion: "" + # Override pod selector to select pod objects. + selectorOverride: {} + # Attach node metadata to discovered targets. Requires Prometheus v2.35.0 and above. + attachMetadata: + node: false + # The label to use to retrieve the job name from. Defaults to label app.kubernetes.io/name. + jobLabel: "" + + # Scheme/protocol to use for scraping. + scheme: "http" + # Path to scrape metrics at. + path: "/metrics" + + # BasicAuth allow an endpoint to authenticate over basic authentication. + # More info: https://prometheus.io/docs/operating/configuration/#endpoint + basicAuth: {} + # Secret to mount to read bearer token for scraping targets. + # The secret needs to be in the same namespace as the pod monitor and accessible by the Prometheus Operator. + # https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#secretkeyselector-v1-core + bearerTokenSecret: {} + # TLS configuration to use when scraping the endpoint. + tlsConfig: {} + # Authorization section for this endpoint. + # https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.SafeAuthorization + authorization: {} + # OAuth2 for the URL. Only valid in Prometheus versions 2.27.0 and newer. + # https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.OAuth2 + oauth2: {} + + # ProxyURL eg http://proxyserver:2195. Directs scrapes through proxy to this endpoint. + proxyUrl: "" + # Interval at which endpoints should be scraped. If not specified Prometheus’ global scrape interval is used. + interval: "" + # Timeout after which the scrape is ended. If not specified, the Prometheus global scrape interval is used. + scrapeTimeout: "" + # HonorTimestamps controls whether Prometheus respects the timestamps present in scraped data. + honorTimestamps: true + # HonorLabels chooses the metric’s labels on collisions with target labels. + honorLabels: true + # Whether to enable HTTP2. Default false. + enableHttp2: "" + # Drop pods that are not running. (Failed, Succeeded). + # Enabled by default. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase + filterRunning: "" + # FollowRedirects configures whether scrape requests follow HTTP 3xx redirects. Default false. + followRedirects: "" + # Optional HTTP URL parameters + params: {} + + # RelabelConfigs to apply to samples before scraping. Prometheus Operator automatically adds + # relabelings for a few standard Kubernetes fields. The original scrape job’s name + # is available via the __tmp_prometheus_job_name label. + # More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config + relabelings: [] + # MetricRelabelConfigs to apply to samples before ingestion. + metricRelabelings: [] + + # SampleLimit defines per-scrape limit on number of scraped samples that will be accepted. + sampleLimit: 0 + # TargetLimit defines a limit on the number of scraped targets that will be accepted. + targetLimit: 0 + # Per-scrape limit on number of labels that will be accepted for a sample. + # Only valid in Prometheus versions 2.27.0 and newer. + labelLimit: 0 + # Per-scrape limit on length of labels name that will be accepted for a sample. + # Only valid in Prometheus versions 2.27.0 and newer. + labelNameLengthLimit: 0 + # Per-scrape limit on length of labels value that will be accepted for a sample. + # Only valid in Prometheus versions 2.27.0 and newer. + labelValueLengthLimit: 0 + +## Customize the updateStrategy if set +updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: 1 + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 200m + # memory: 50Mi + # requests: + # cpu: 100m + # memory: 30Mi + +serviceAccount: + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the fullname template + name: + annotations: {} + imagePullSecrets: [] + automountServiceAccountToken: false + +securityContext: + fsGroup: 65534 + runAsGroup: 65534 + runAsNonRoot: true + runAsUser: 65534 + +containerSecurityContext: + readOnlyRootFilesystem: true + # capabilities: + # add: + # - SYS_TIME + +rbac: + ## If true, create & use RBAC resources + ## + create: true + ## If true, create & use Pod Security Policy resources + ## https://kubernetes.io/docs/concepts/policy/pod-security-policy/ + pspEnabled: true + pspAnnotations: {} + +# for deployments that have node_exporter deployed outside of the cluster, list +# their addresses here +endpoints: [] + +# Expose the service to the host network +hostNetwork: true + +# Share the host process ID namespace +hostPID: true + +# Mount the node's root file system (/) at /host/root in the container +hostRootFsMount: + enabled: true + # Defines how new mounts in existing mounts on the node or in the container + # are propagated to the container or node, respectively. Possible values are + # None, HostToContainer, and Bidirectional. If this field is omitted, then + # None is used. More information on: + # https://kubernetes.io/docs/concepts/storage/volumes/#mount-propagation + mountPropagation: HostToContainer + +## Assign a group of affinity scheduling rules +## +affinity: {} +# nodeAffinity: +# requiredDuringSchedulingIgnoredDuringExecution: +# nodeSelectorTerms: +# - matchFields: +# - key: metadata.name +# operator: In +# values: +# - target-host-name + +# Annotations to be added to node exporter pods +podAnnotations: + # Fix for very slow GKE cluster upgrades + cluster-autoscaler.kubernetes.io/safe-to-evict: "true" + +# Extra labels to be added to node exporter pods +podLabels: {} + +# Annotations to be added to node exporter daemonset +daemonsetAnnotations: {} + +## set to true to add the release label so scraping of the servicemonitor with kube-prometheus-stack works out of the box +releaseLabel: false + +# Custom DNS configuration to be added to prometheus-node-exporter pods +dnsConfig: {} +# nameservers: +# - 1.2.3.4 +# searches: +# - ns1.svc.cluster-domain.example +# - my.dns.search.suffix +# options: +# - name: ndots +# value: "2" +# - name: edns0 + +## Assign a nodeSelector if operating a hybrid cluster +## +nodeSelector: + kubernetes.io/os: linux + # kubernetes.io/arch: amd64 + +tolerations: + - effect: NoSchedule + operator: Exists + +## Assign a PriorityClassName to pods if set +# priorityClassName: "" + +## Additional container arguments +## +extraArgs: [] +# - --collector.diskstats.ignored-devices=^(ram|loop|fd|(h|s|v)d[a-z]|nvme\\d+n\\d+p)\\d+$ +# - --collector.textfile.directory=/run/prometheus + +## Additional mounts from the host to node-exporter container +## +extraHostVolumeMounts: [] +# - name: +# hostPath: +# mountPath: +# readOnly: true|false +# mountPropagation: None|HostToContainer|Bidirectional + +## Additional configmaps to be mounted. +## +configmaps: [] +# - name: +# mountPath: +secrets: [] +# - name: +# mountPath: +## Override the deployment namespace +## +namespaceOverride: "" + +## Additional containers for export metrics to text file +## +sidecars: [] +## - name: nvidia-dcgm-exporter +## image: nvidia/dcgm-exporter:1.4.3 + +## Volume for sidecar containers +## +sidecarVolumeMount: [] +## - name: collector-textfiles +## mountPath: /run/prometheus +## readOnly: false + +## Additional mounts from the host to sidecar containers +## +sidecarHostVolumeMounts: [] +# - name: +# hostPath: +# mountPath: +# readOnly: true|false +# mountPropagation: None|HostToContainer|Bidirectional + +## Additional InitContainers to initialize the pod +## +extraInitContainers: [] + +## Liveness probe +## +livenessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: [] + scheme: http + initialDelaySeconds: 0 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + +## Readiness probe +## +readinessProbe: + failureThreshold: 3 + httpGet: + httpHeaders: [] + scheme: http + initialDelaySeconds: 0 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + +# Enable vertical pod autoscaler support for prometheus-node-exporter +verticalPodAutoscaler: + enabled: false + + # Recommender responsible for generating recommendation for the object. + # List should be empty (then the default recommender will generate the recommendation) + # or contain exactly one recommender. + # recommenders: + # - name: custom-recommender-performance + + # List of resources that the vertical pod autoscaler can control. Defaults to cpu and memory + controlledResources: [] + # Specifies which resource values should be controlled: RequestsOnly or RequestsAndLimits. + # controlledValues: RequestsAndLimits + + # Define the max allowed resources for the pod + maxAllowed: {} + # cpu: 200m + # memory: 100Mi + # Define the min allowed resources for the pod + minAllowed: {} + # cpu: 200m + # memory: 100Mi + + # updatePolicy: + # Specifies minimal number of replicas which need to be alive for VPA Updater to attempt pod eviction + # minReplicas: 1 + # Specifies whether recommended updates are applied when a Pod is started and whether recommended updates + # are applied during the life of a Pod. Possible values are "Off", "Initial", "Recreate", and "Auto". + # updateMode: Auto + +# Extra manifests to deploy as an array +extraManifests: [] + # - | + # apiVersion: v1 + # kind: ConfigMap + # metadata: + # name: prometheus-extra + # data: + # extra-data: "value" diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/.helmignore b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/.helmignore new file mode 100644 index 0000000..62ec87a --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/.helmignore @@ -0,0 +1,24 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj + +# OWNERS file for Kubernetes +OWNERS \ No newline at end of file diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/Chart.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/Chart.yaml new file mode 100644 index 0000000..5b32a60 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +appVersion: "v1.6.2" +description: A Helm chart for prometheus pushgateway +name: prometheus-pushgateway +version: 2.4.2 +home: https://github.com/prometheus/pushgateway +sources: + - https://github.com/prometheus/pushgateway +type: application +keywords: + - pushgateway + - prometheus +maintainers: + - email: gianrubio@gmail.com + name: gianrubio + - email: christian.staude@staffbase.com + name: cstaud + - email: rootsandtrees@posteo.de + name: zeritti +annotations: + "artifacthub.io/license": Apache-2.0 + "artifacthub.io/links": | + - name: Chart Source + url: https://github.com/prometheus-community/helm-charts diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/README.md b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/README.md new file mode 100644 index 0000000..f06d286 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/README.md @@ -0,0 +1,88 @@ +# Prometheus Pushgateway + +This chart bootstraps a prometheus [pushgateway](http://github.com/prometheus/pushgateway) deployment on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager. + +An optional prometheus `ServiceMonitor` can be enabled, should you wish to use this gateway with a [Prometheus Operator](https://github.com/coreos/prometheus-operator). + +## Get Repository Info + +```console +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo update +``` + +_See [`helm repo`](https://helm.sh/docs/helm/helm_repo/) for command documentation._ + +## Install Chart + +```console +helm install [RELEASE_NAME] prometheus-community/prometheus-pushgateway +``` + +_See [configuration](#configuration) below._ + +_See [helm install](https://helm.sh/docs/helm/helm_install/) for command documentation._ + +## Uninstall Chart + +```console +helm uninstall [RELEASE_NAME] +``` + +This removes all the Kubernetes components associated with the chart and deletes the release. + +_See [helm uninstall](https://helm.sh/docs/helm/helm_uninstall/) for command documentation._ + +## Upgrading Chart + +```console +helm upgrade [RELEASE_NAME] [CHART] --install +``` + +_See [helm upgrade](https://helm.sh/docs/helm/helm_upgrade/) for command documentation._ + +### To 2.0.0 + +Chart API version has been upgraded to v2 so Helm 3 is needed from now on. + +Docker image tag is used from Chart.yaml appVersion field by default now. + +Version 2.0.0 also adapted [Helm label and annotation best practices](https://helm.sh/docs/chart_best_practices/labels/). Specifically, labels mapping is listed below: + +```console +OLD => NEW +---------------------------------------- +heritage => app.kubernetes.io/managed-by +chart => helm.sh/chart +[container version] => app.kubernetes.io/version +app => app.kubernetes.io/name +release => app.kubernetes.io/instance +``` + +Therefore, depending on the way you've configured the chart, the previous StatefulSet or Deployment need to be deleted before upgrade. + +If `runAsStatefulSet: false` (this is the default): + +```console +kubectl delete deploy -l app=prometheus-pushgateway +``` + +If `runAsStatefulSet: true`: + +```console +kubectl delete sts -l app=prometheus-pushgateway +``` + +After that do the actual upgrade: + +```console +helm upgrade -i prometheus-pushgateway prometheus-community/prometheus-pushgateway +``` + +## Configuration + +See [Customizing the Chart Before Installing](https://helm.sh/docs/intro/using_helm/#customizing-the-chart-before-installing). To see all configurable options with detailed comments, visit the chart's [values.yaml](./values.yaml), or run these configuration commands: + +```console +helm show values prometheus-community/prometheus-pushgateway +``` diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/NOTES.txt b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/NOTES.txt new file mode 100644 index 0000000..643be97 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/NOTES.txt @@ -0,0 +1,19 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range .Values.ingress.hosts }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ . }}{{ $.Values.ingress.path }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "prometheus-pushgateway.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get svc -w {{ template "prometheus-pushgateway.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "prometheus-pushgateway.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ template "prometheus-pushgateway.name" . }},release={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:9091 to use your application" + kubectl port-forward $POD_NAME 9091 +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/_helpers.tpl b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/_helpers.tpl new file mode 100644 index 0000000..41c8831 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/_helpers.tpl @@ -0,0 +1,208 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "prometheus-pushgateway.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Namespace to set on the resources +*/}} +{{- define "prometheus-pushgateway.namespace" -}} + {{- if .Values.namespaceOverride -}} + {{- .Values.namespaceOverride -}} + {{- else -}} + {{- .Release.Namespace -}} + {{- end -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "prometheus-pushgateway.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "prometheus-pushgateway.chart" -}} +{{ printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "prometheus-pushgateway.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "prometheus-pushgateway.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} + +{{/* +Create default labels +*/}} +{{- define "prometheus-pushgateway.defaultLabels" -}} +helm.sh/chart: {{ include "prometheus-pushgateway.chart" . }} +{{ include "prometheus-pushgateway.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- with .Values.podLabels }} +{{ toYaml . }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "prometheus-pushgateway.selectorLabels" -}} +app.kubernetes.io/name: {{ include "prometheus-pushgateway.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Return the appropriate apiVersion for networkpolicy. +*/}} +{{- define "prometheus-pushgateway.networkPolicy.apiVersion" -}} +{{- if semverCompare ">=1.4-0, <1.7-0" .Capabilities.KubeVersion.GitVersion }} +{{- print "extensions/v1beta1" }} +{{- else if semverCompare "^1.7-0" .Capabilities.KubeVersion.GitVersion }} +{{- print "networking.k8s.io/v1" }} +{{- end }} +{{- end }} + +{{/* +Define PDB apiVersion +*/}} +{{- define "prometheus-pushgateway.pdb.apiVersion" -}} +{{- if $.Capabilities.APIVersions.Has "policy/v1/PodDisruptionBudget" }} +{{- print "policy/v1" }} +{{- else }} +{{- print "policy/v1beta1" }} +{{- end }} +{{- end }} + +{{/* +Define Ingress apiVersion +*/}} +{{- define "prometheus-pushgateway.ingress.apiVersion" -}} +{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }} +{{- print "networking.k8s.io/v1" }} +{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion }} +{{- print "networking.k8s.io/v1beta1" }} +{{- else }} +{{- print "extensions/v1beta1" }} +{{- end }} +{{- end }} + +{{/* +Returns pod spec +*/}} +{{- define "prometheus-pushgateway.podSpec" -}} +serviceAccountName: {{ include "prometheus-pushgateway.serviceAccountName" . }} +{{- with .Values.priorityClassName }} +priorityClassName: {{ . | quote }} +{{- end }} +{{- with .Values.imagePullSecrets }} +imagePullSecrets: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with .Values.extraInitContainers }} +initContainers: + {{- toYaml . | nindent 2 }} +{{- end }} +containers: + {{- with .Values.extraContainers }} + {{- toYaml . | nindent 2 }} + {{- end }} + - name: pushgateway + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- with .Values.extraVars }} + env: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.extraArgs }} + args: + {{- toYaml . | nindent 6 }} + {{- end }} + ports: + - name: metrics + containerPort: 9091 + protocol: TCP + {{- if .Values.liveness.enabled }} + livenessProbe: + {{- toYaml .Values.liveness.probe | nindent 6 }} + {{- end }} + {{- if .Values.readiness.enabled }} + readinessProbe: + {{- toYaml .Values.readiness.probe | nindent 6 }} + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 6 }} + {{- end }} + {{- with .Values.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 6 }} + {{- end }} + volumeMounts: + - name: storage-volume + mountPath: "{{ .Values.persistentVolume.mountPath }}" + subPath: "{{ .Values.persistentVolume.subPath }}" + {{- with .Values.extraVolumeMounts }} + {{- toYaml . | nindent 6 }} + {{- end }} +{{- with .Values.nodeSelector }} +nodeSelector: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with .Values.tolerations }} +tolerations: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with .Values.affinity }} +affinity: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with .Values.topologySpreadConstraints }} +topologySpreadConstraints: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with .Values.securityContext }} +securityContext: + {{- toYaml . | nindent 2 }} +{{- end }} +volumes: + {{- $storageVolumeAsPVCTemplate := and .Values.runAsStatefulSet .Values.persistentVolume.enabled -}} + {{- if not $storageVolumeAsPVCTemplate }} + - name: storage-volume + {{- if .Values.persistentVolume.enabled }} + persistentVolumeClaim: + claimName: {{ if .Values.persistentVolume.existingClaim }}{{ .Values.persistentVolume.existingClaim }}{{- else }}{{ include "prometheus-pushgateway.fullname" . }}{{- end }} + {{- else }} + emptyDir: {} + {{- end }} + {{- end }} + {{- if .Values.extraVolumes }} + {{- toYaml .Values.extraVolumes | nindent 2 }} + {{- else if $storageVolumeAsPVCTemplate }} + [] + {{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/deployment.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/deployment.yaml new file mode 100644 index 0000000..5da2e45 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/deployment.yaml @@ -0,0 +1,28 @@ +{{- if not .Values.runAsStatefulSet }} +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + {{- include "prometheus-pushgateway.defaultLabels" . | nindent 4 }} + name: {{ include "prometheus-pushgateway.fullname" . }} + namespace: {{ template "prometheus-pushgateway.namespace" . }} +spec: + replicas: {{ .Values.replicaCount }} + {{- with .Values.strategy }} + strategy: + {{- toYaml . | nindent 4 }} + {{- end }} + selector: + matchLabels: + {{- include "prometheus-pushgateway.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "prometheus-pushgateway.defaultLabels" . | nindent 8 }} + spec: + {{- include "prometheus-pushgateway.podSpec" . | nindent 6 }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/ingress.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/ingress.yaml new file mode 100644 index 0000000..54ed256 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/ingress.yaml @@ -0,0 +1,50 @@ +{{- if .Values.ingress.enabled }} +{{- $serviceName := include "prometheus-pushgateway.fullname" . }} +{{- $servicePort := .Values.service.port }} +{{- $ingressPath := .Values.ingress.path }} +{{- $ingressClassName := .Values.ingress.className }} +{{- $ingressPathType := .Values.ingress.pathType }} +{{- $extraPaths := .Values.ingress.extraPaths }} +apiVersion: {{ include "prometheus-pushgateway.ingress.apiVersion" . }} +kind: Ingress +metadata: + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + labels: + {{- include "prometheus-pushgateway.defaultLabels" . | nindent 4 }} + name: {{ include "prometheus-pushgateway.fullname" . }} + namespace: {{ template "prometheus-pushgateway.namespace" . }} +spec: + {{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion }} + ingressClassName: {{ $ingressClassName }} + {{- end }} + rules: + {{- range $host := .Values.ingress.hosts }} + - host: {{ $host }} + http: + paths: + {{- with $extraPaths }} + {{- toYaml . | nindent 10 }} + {{- end }} + - path: {{ $ingressPath }} + {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }} + pathType: {{ $ingressPathType }} + {{- end }} + backend: + {{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }} + service: + name: {{ $serviceName }} + port: + number: {{ $servicePort }} + {{- else }} + serviceName: {{ $serviceName }} + servicePort: {{ $servicePort }} + {{- end }} + {{- end -}} + {{- with .Values.ingress.tls }} + tls: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/networkpolicy.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/networkpolicy.yaml new file mode 100644 index 0000000..693bbf0 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/networkpolicy.yaml @@ -0,0 +1,26 @@ +{{- if .Values.networkPolicy }} +apiVersion: {{ include "prometheus-pushgateway.networkPolicy.apiVersion" . }} +kind: NetworkPolicy +metadata: + labels: + {{- include "prometheus-pushgateway.defaultLabels" . | nindent 4 }} + {{- if .Values.networkPolicy.customSelectors }} + name: ingress-allow-customselector-{{ template "prometheus-pushgateway.name" . }} + {{- else if .Values.networkPolicy.allowAll }} + name: ingress-allow-all-{{ template "prometheus-pushgateway.name" . }} + {{- else -}} + {{- fail "One of `allowAll` or `customSelectors` must be specified." }} + {{- end }} + namespace: {{ template "prometheus-pushgateway.namespace" . }} +spec: + podSelector: + matchLabels: + {{- include "prometheus-pushgateway.selectorLabels" . | nindent 6 }} + ingress: + - ports: + - port: {{ .Values.service.targetPort }} + {{- with .Values.networkPolicy.customSelectors }} + from: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/pdb.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/pdb.yaml new file mode 100644 index 0000000..948f474 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/pdb.yaml @@ -0,0 +1,14 @@ +{{- if .Values.podDisruptionBudget }} +apiVersion: {{ include "prometheus-pushgateway.pdb.apiVersion" . }} +kind: PodDisruptionBudget +metadata: + labels: + {{- include "prometheus-pushgateway.defaultLabels" . | nindent 4 }} + name: {{ include "prometheus-pushgateway.fullname" . }} + namespace: {{ template "prometheus-pushgateway.namespace" . }} +spec: + selector: + matchLabels: + {{- include "prometheus-pushgateway.selectorLabels" . | nindent 6 }} + {{- toYaml .Values.podDisruptionBudget | nindent 2 }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/pushgateway-pvc.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/pushgateway-pvc.yaml new file mode 100644 index 0000000..8f9f418 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/pushgateway-pvc.yaml @@ -0,0 +1,29 @@ +{{- if and (not .Values.runAsStatefulSet) .Values.persistentVolume.enabled (not .Values.persistentVolume.existingClaim) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + {{- with .Values.persistentVolume.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + labels: + {{- include "prometheus-pushgateway.defaultLabels" . | nindent 4 }} + {{- with .Values.persistentVolumeLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} + name: {{ include "prometheus-pushgateway.fullname" . }} + namespace: {{ template "prometheus-pushgateway.namespace" . }} +spec: + accessModes: + {{- toYaml .Values.persistentVolume.accessModes | nindent 4 }} + {{- if .Values.persistentVolume.storageClass }} + {{- if (eq "-" .Values.persistentVolume.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: "{{ .Values.persistentVolume.storageClass }}" + {{- end }} + {{- end }} + resources: + requests: + storage: "{{ .Values.persistentVolume.size }}" +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/service.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/service.yaml new file mode 100644 index 0000000..afff2a7 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/service.yaml @@ -0,0 +1,41 @@ +{{- $stsNoHeadlessSvcTypes := list "LoadBalancer" "NodePort" -}} +apiVersion: v1 +kind: Service +metadata: + {{- with .Values.serviceAnnotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + labels: + {{- include "prometheus-pushgateway.defaultLabels" . | nindent 4 }} + {{- with .Values.serviceLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} + name: {{ include "prometheus-pushgateway.fullname" . }} + namespace: {{ template "prometheus-pushgateway.namespace" . }} +spec: + {{- if .Values.service.clusterIP }} + clusterIP: {{ .Values.service.clusterIP }} + {{ else if and .Values.runAsStatefulSet (not (has .Values.service.type $stsNoHeadlessSvcTypes)) }} + clusterIP: None # Headless service + {{- end }} + type: {{ .Values.service.type }} + {{- with .Values.service.loadBalancerIP }} + loadBalancerIP: {{ . }} + {{- end }} + {{- if .Values.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.service.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} + {{- end }} + ports: + - port: {{ .Values.service.port }} + targetPort: {{ .Values.service.targetPort }} + {{- if and (eq .Values.service.type "NodePort") .Values.service.nodePort }} + nodePort: {{ .Values.service.nodePort }} + {{- end }} + protocol: TCP + name: http + selector: + {{- include "prometheus-pushgateway.selectorLabels" . | nindent 4 }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/serviceaccount.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/serviceaccount.yaml new file mode 100644 index 0000000..3725646 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/serviceaccount.yaml @@ -0,0 +1,16 @@ +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} + labels: + {{- include "prometheus-pushgateway.defaultLabels" . | nindent 4 }} + {{- with .Values.serviceAccountLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} + name: {{ include "prometheus-pushgateway.serviceAccountName" . }} + namespace: {{ template "prometheus-pushgateway.namespace" . }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/servicemonitor.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/servicemonitor.yaml new file mode 100644 index 0000000..80b1d38 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/servicemonitor.yaml @@ -0,0 +1,51 @@ +{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + {{- include "prometheus-pushgateway.defaultLabels" . | nindent 4 }} + {{- if .Values.serviceMonitor.additionalLabels }} + {{- toYaml .Values.serviceMonitor.additionalLabels | nindent 4 }} + {{- end }} + name: {{ include "prometheus-pushgateway.fullname" . }} + {{- if .Values.serviceMonitor.namespace }} + namespace: {{ .Values.serviceMonitor.namespace }} + {{- else }} + namespace: {{ template "prometheus-pushgateway.namespace" . }} + {{- end }} +spec: + endpoints: + - port: http + {{- with .Values.serviceMonitor.interval }} + interval: {{ . }} + {{- end }} + {{- with .Values.serviceMonitor.scheme }} + scheme: {{ . }} + {{- end }} + {{- with .Values.serviceMonitor.bearerTokenFile }} + bearerTokenFile: {{ . }} + {{- end }} + {{- with .Values.serviceMonitor.tlsConfig }} + tlsConfig: + {{- toYaml .| nindent 6 }} + {{- end }} + {{- with .Values.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ . }} + {{- end }} + path: {{ .Values.serviceMonitor.telemetryPath }} + honorLabels: {{ .Values.serviceMonitor.honorLabels }} + {{- with .Values.serviceMonitor.metricRelabelings }} + metricRelabelings: + {{- tpl (toYaml . | nindent 6) $ }} + {{- end }} + {{- with .Values.serviceMonitor.relabelings }} + relabelings: + {{- toYaml . | nindent 6 }} + {{- end }} + namespaceSelector: + matchNames: + - {{ template "prometheus-pushgateway.namespace" . }} + selector: + matchLabels: + {{- include "prometheus-pushgateway.selectorLabels" . | nindent 6 }} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/statefulset.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/statefulset.yaml new file mode 100644 index 0000000..82cf57e --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/templates/statefulset.yaml @@ -0,0 +1,49 @@ +{{- if .Values.runAsStatefulSet }} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + labels: + {{- include "prometheus-pushgateway.defaultLabels" . | nindent 4 }} + name: {{ include "prometheus-pushgateway.fullname" . }} + namespace: {{ template "prometheus-pushgateway.namespace" . }} +spec: + replicas: {{ .Values.replicaCount }} + serviceName: {{ include "prometheus-pushgateway.fullname" . }} + selector: + matchLabels: + {{- include "prometheus-pushgateway.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "prometheus-pushgateway.defaultLabels" . | nindent 8 }} + spec: + {{- include "prometheus-pushgateway.podSpec" . | nindent 6 }} + {{- if .Values.persistentVolume.enabled }} + volumeClaimTemplates: + - metadata: + {{- with .Values.persistentVolume.annotations }} + annotations: + {{- toYaml . | nindent 10 }} + {{- end }} + labels: + {{- include "prometheus-pushgateway.defaultLabels" . | nindent 10 }} + name: storage-volume + spec: + accessModes: + {{ toYaml .Values.persistentVolume.accessModes }} + {{- if .Values.persistentVolume.storageClass }} + {{- if (eq "-" .Values.persistentVolume.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: "{{ .Values.persistentVolume.storageClass }}" + {{- end }} + {{- end }} + resources: + requests: + storage: "{{ .Values.persistentVolume.size }}" + {{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/values.yaml b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/values.yaml new file mode 100644 index 0000000..2e4a4b6 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/charts/prometheus-pushgateway/values.yaml @@ -0,0 +1,330 @@ +# Default values for prometheus-pushgateway. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Provide a name in place of prometheus-pushgateway for `app:` labels +nameOverride: "" + +# Provide a name to substitute for the full names of resources +fullnameOverride: "" + +# Provide a namespace to substitude for the namespace on resources +namespaceOverride: "" + +image: + repository: quay.io/prometheus/pushgateway + # if not set appVersion field from Chart.yaml is used + tag: "" + pullPolicy: IfNotPresent + +# Optional pod imagePullSecrets +imagePullSecrets: [] + +service: + type: ClusterIP + port: 9091 + targetPort: 9091 + # nodePort: 32100 + + # Optional - Can be used for headless if value is "None" + clusterIP: "" + + loadBalancerIP: "" + loadBalancerSourceRanges: [] + +# Optional pod annotations +podAnnotations: {} + +# Optional pod labels +podLabels: {} + +# Optional service annotations +serviceAnnotations: {} + +# Optional service labels +serviceLabels: {} + +# Optional serviceAccount labels +serviceAccountLabels: {} + +# Optional persistentVolume labels +persistentVolumeLabels: {} + +# Optional additional environment variables +extraVars: [] + +## Additional pushgateway container arguments +## +## example: +## extraArgs: +## - --persistence.file=/data/pushgateway.data +## - --persistence.interval=5m +extraArgs: [] + +## Additional InitContainers to initialize the pod +## +extraInitContainers: [] + +# Optional additional containers (sidecar) +extraContainers: [] + # - name: oAuth2-proxy + # args: + # - -https-address=:9092 + # - -upstream=http://localhost:9091 + # - -skip-auth-regex=^/metrics + # - -openshift-delegate-urls={"/":{"group":"monitoring.coreos.com","resource":"prometheuses","verb":"get"}} + # image: openshift/oauth-proxy:v1.1.0 + # ports: + # - containerPort: 9092 + # name: proxy + # resources: + # limits: + # memory: 16Mi + # requests: + # memory: 4Mi + # cpu: 20m + # volumeMounts: + # - mountPath: /etc/prometheus/secrets/pushgateway-tls + # name: secret-pushgateway-tls + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 200m + # memory: 50Mi + # requests: + # cpu: 100m + # memory: 30Mi + +liveness: + enabled: true + probe: + httpGet: + path: /-/healthy + port: 9091 + initialDelaySeconds: 10 + timeoutSeconds: 10 + +readiness: + enabled: true + probe: + httpGet: + path: /-/ready + port: 9091 + initialDelaySeconds: 10 + timeoutSeconds: 10 + +serviceAccount: + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the fullname template + name: + +## Configure ingress resource that allow you to access the +## pushgateway installation. Set up the URL +## ref: http://kubernetes.io/docs/user-guide/ingress/ +## +ingress: + ## Enable Ingress. + ## + enabled: false + # AWS ALB requires path of /* + className: "" + path: / + pathType: ImplementationSpecific + + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # backend: + # serviceName: ssl-redirect + # servicePort: use-annotation + + ## Annotations. + ## + # annotations: + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: 'true' + + ## Hostnames. + ## Must be provided if Ingress is enabled. + ## + # hosts: + # - pushgateway.domain.com + + ## TLS configuration. + ## Secrets must be manually created in the namespace. + ## + # tls: + # - secretName: pushgateway-tls + # hosts: + # - pushgateway.domain.com + +tolerations: [] + # - effect: NoSchedule + # operator: Exists + +## Node labels for pushgateway pod assignment +## Ref: https://kubernetes.io/docs/user-guide/node-selection/ +## +nodeSelector: {} + +replicaCount: 1 + +## When running more than one replica alongside with persistence, different volumes are needed +## per replica, since sharing a `persistence.file` across replicas does not keep metrics synced. +## For this purpose, you can enable the `runAsStatefulSet` to deploy the pushgateway as a +## StatefulSet instead of as a Deployment. +runAsStatefulSet: false + +## Security context to be added to push-gateway pods +## +securityContext: + fsGroup: 65534 + runAsUser: 65534 + runAsNonRoot: true + +## Security context to be added to push-gateway containers +## Having a separate variable as securityContext differs for pods and containers. +containerSecurityContext: {} +# allowPrivilegeEscalation: false +# readOnlyRootFilesystem: true +# runAsUser: 65534 +# runAsNonRoot: true + +## Affinity for pod assignment +## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity +affinity: {} + +## Topology spread constraints for pods +## Ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/ +topologySpreadConstraints: [] + +# Enable this if you're using https://github.com/coreos/prometheus-operator +serviceMonitor: + enabled: false + namespace: monitoring + + # telemetryPath: HTTP resource path from which to fetch metrics. + # Telemetry path, default /metrics, has to be prefixed accordingly if pushgateway sets a route prefix at start-up. + # + telemetryPath: "/metrics" + + # Fallback to the prometheus default unless specified + # interval: 10s + + ## scheme: HTTP scheme to use for scraping. Can be used with `tlsConfig` for example if using istio mTLS. + # scheme: "" + + ## tlsConfig: TLS configuration to use when scraping the endpoint. For example if using istio mTLS. + ## Of type: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#tlsconfig + # tlsConfig: {} + + # bearerTokenFile: + # Fallback to the prometheus default unless specified + # scrapeTimeout: 30s + + ## Used to pass Labels that are used by the Prometheus installed in your cluster to select Service Monitors to work with + ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#prometheusspec + additionalLabels: {} + + # Retain the job and instance labels of the metrics pushed to the Pushgateway + # [Scraping Pushgateway](https://github.com/prometheus/pushgateway#configure-the-pushgateway-as-a-target-to-scrape) + honorLabels: true + + ## Metric relabel configs to apply to samples before ingestion. + ## [Metric Relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#metric_relabel_configs) + metricRelabelings: [] + # - action: keep + # regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+' + # sourceLabels: [__name__] + + ## Relabel configs to apply to samples before ingestion. + ## [Relabeling](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config) + relabelings: [] + # - sourceLabels: [__meta_kubernetes_pod_node_name] + # separator: ; + # regex: ^(.*)$ + # targetLabel: nodename + # replacement: $1 + # action: replace + +# The values to set in the PodDisruptionBudget spec (minAvailable/maxUnavailable) +# If not set then a PodDisruptionBudget will not be created +podDisruptionBudget: {} + +priorityClassName: + +# Deployment Strategy type +strategy: + type: Recreate + +persistentVolume: + ## If true, pushgateway will create/use a Persistent Volume Claim + ## If false, use emptyDir + ## + enabled: false + + ## pushgateway data Persistent Volume access modes + ## Must match those of existing PV or dynamic provisioner + ## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ + ## + accessModes: + - ReadWriteOnce + + ## pushgateway data Persistent Volume Claim annotations + ## + annotations: {} + + ## pushgateway data Persistent Volume existing claim name + ## Requires pushgateway.persistentVolume.enabled: true + ## If defined, PVC must be created manually before volume will be bound + existingClaim: "" + + ## pushgateway data Persistent Volume mount root path + ## + mountPath: /data + + ## pushgateway data Persistent Volume size + ## + size: 2Gi + + ## pushgateway data Persistent Volume Storage Class + ## If defined, storageClassName: + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. (gp2 on AWS, standard on + ## GKE, AWS & OpenStack) + ## + # storageClass: "-" + + ## Subdirectory of pushgateway data Persistent Volume to mount + ## Useful if the volume's root directory is not empty + ## + subPath: "" + +extraVolumes: [] + # - name: extra + # emptyDir: {} +extraVolumeMounts: [] + # - name: extra + # mountPath: /usr/share/extras + # readOnly: true + +# Configuration for clusters with restrictive network policies in place: +# - allowAll allows access to the PushGateway from any namespace +# - customSelector is a list of pod/namespaceSelectors to allow access from +# These options are mutually exclusive and the latter will take precedence. +networkPolicy: {} + # allowAll: true + # customSelectors: + # - namespaceSelector: + # matchLabels: + # type: admin + # - podSelector: + # matchLabels: + # app: myapp diff --git a/sregym/observer/prometheus/prometheus/templates/NOTES.txt b/sregym/observer/prometheus/prometheus/templates/NOTES.txt new file mode 100644 index 0000000..7b931ae --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/NOTES.txt @@ -0,0 +1,113 @@ +The Prometheus server can be accessed via port {{ .Values.server.service.servicePort }} on the following DNS name from within your cluster: +{{ template "prometheus.server.fullname" . }}.{{ .Release.Namespace }}.svc.cluster.local + +{{ if .Values.server.ingress.enabled -}} +From outside the cluster, the server URL(s) are: +{{- range .Values.server.ingress.hosts }} +http://{{ . }} +{{- end }} +{{- else }} +Get the Prometheus server URL by running these commands in the same shell: +{{- if contains "NodePort" .Values.server.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "prometheus.server.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.server.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get svc --namespace {{ .Release.Namespace }} -w {{ template "prometheus.server.fullname" . }}' + + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "prometheus.server.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo http://$SERVICE_IP:{{ .Values.server.service.servicePort }} +{{- else if contains "ClusterIP" .Values.server.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "prometheus.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 9090 +{{- end }} + + +{{- if .Values.server.persistentVolume.enabled }} +{{- else }} +################################################################################# +###### WARNING: Persistence is disabled!!! You will lose your data when ##### +###### the Server pod is terminated. ##### +################################################################################# +{{- end }} +{{- end }} + +{{ if .Values.alertmanager.enabled }} +The Prometheus alertmanager can be accessed via port {{ .Values.alertmanager.service.port }} on the following DNS name from within your cluster: +{{ template "prometheus.alertmanager.fullname" . }}.{{ .Release.Namespace }}.svc.cluster.local + +{{ if .Values.alertmanager.ingress.enabled -}} +From outside the cluster, the alertmanager URL(s) are: +{{- range .Values.alertmanager.ingress.hosts }} +http://{{ . }} +{{- end }} +{{- else }} +Get the Alertmanager URL by running these commands in the same shell: +{{- if contains "NodePort" .Values.alertmanager.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "prometheus.alertmanager.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.alertmanager.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get svc --namespace {{ .Release.Namespace }} -w {{ template "prometheus.alertmanager.fullname" . }}' + + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "prometheus.alertmanager.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo http://$SERVICE_IP:{{ .Values.alertmanager.service.servicePort }} +{{- else if contains "ClusterIP" .Values.alertmanager.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "alertmanager.name" .Subcharts.alertmanager }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 9093 +{{- end }} +{{- end }} + +{{- if .Values.alertmanager.persistence.enabled }} +{{- else }} +################################################################################# +###### WARNING: Persistence is disabled!!! You will lose your data when ##### +###### the AlertManager pod is terminated. ##### +################################################################################# +{{- end }} +{{- end }} + +{{- if (index .Values "prometheus-node-exporter" "enabled") }} +################################################################################# +###### WARNING: Pod Security Policy has been disabled by default since ##### +###### it deprecated after k8s 1.25+. use ##### +###### (index .Values "prometheus-node-exporter" "rbac" ##### +###### . "pspEnabled") with (index .Values ##### +###### "prometheus-node-exporter" "rbac" "pspAnnotations") ##### +###### in case you still need it. ##### +################################################################################# +{{- end }} + +{{ if (index .Values "prometheus-pushgateway" "enabled") }} +The Prometheus PushGateway can be accessed via port {{ index .Values "prometheus-pushgateway" "service" "port" }} on the following DNS name from within your cluster: +{{ include "prometheus-pushgateway.fullname" (index .Subcharts "prometheus-pushgateway") }}.{{ .Release.Namespace }}.svc.cluster.local + +{{ if (index .Values "prometheus-pushgateway" "ingress" "enabled") -}} +From outside the cluster, the pushgateway URL(s) are: +{{- range (index .Values "prometheus-pushgateway" "ingress" "hosts") }} +http://{{ . }} +{{- end }} +{{- else }} +Get the PushGateway URL by running these commands in the same shell: +{{- $pushgateway_svc_type := index .Values "prometheus-pushgateway" "service" "type" -}} +{{- if contains "NodePort" $pushgateway_svc_type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "prometheus-pushgateway.fullname" (index .Subcharts "prometheus-pushgateway") }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" $pushgateway_svc_type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get svc --namespace {{ .Release.Namespace }} -w {{ include "prometheus-pushgateway.fullname" (index .Subcharts "prometheus-pushgateway") }}' + + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "prometheus-pushgateway.fullname" (index .Subcharts "prometheus-pushgateway") }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo http://$SERVICE_IP:{{ index .Values "prometheus-pushgateway" "service" "port" }} +{{- else if contains "ClusterIP" $pushgateway_svc_type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ include "prometheus.name" (index .Subcharts "prometheus-pushgateway") }},component=pushgateway" -o jsonpath="{.items[0].metadata.name}") + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 9091 +{{- end }} +{{- end }} +{{- end }} + +For more information on running Prometheus, visit: +https://prometheus.io/ diff --git a/sregym/observer/prometheus/prometheus/templates/_helpers.tpl b/sregym/observer/prometheus/prometheus/templates/_helpers.tpl new file mode 100644 index 0000000..b7b76ab --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/_helpers.tpl @@ -0,0 +1,234 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "prometheus.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "prometheus.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create labels for prometheus +*/}} +{{- define "prometheus.common.matchLabels" -}} +app.kubernetes.io/name: {{ include "prometheus.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end -}} + +{{/* +Create unified labels for prometheus components +*/}} +{{- define "prometheus.common.metaLabels" -}} +app.kubernetes.io/version: {{ .Chart.AppVersion }} +helm.sh/chart: {{ include "prometheus.chart" . }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/part-of: {{ include "prometheus.name" . }} +{{- with .Values.commonMetaLabels}} +{{ toYaml . }} +{{- end }} +{{- end -}} + +{{- define "prometheus.server.labels" -}} +{{ include "prometheus.server.matchLabels" . }} +{{ include "prometheus.common.metaLabels" . }} +{{- end -}} + +{{- define "prometheus.server.matchLabels" -}} +app.kubernetes.io/component: {{ .Values.server.name }} +{{ include "prometheus.common.matchLabels" . }} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "prometheus.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create a fully qualified ClusterRole name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "prometheus.clusterRoleName" -}} +{{- if .Values.server.clusterRoleNameOverride -}} +{{ .Values.server.clusterRoleNameOverride | trunc 63 | trimSuffix "-" }} +{{- else -}} +{{ include "prometheus.server.fullname" . }} +{{- end -}} +{{- end -}} + +{{/* +Create a fully qualified alertmanager name for communicating with the user via NOTES.txt +*/}} +{{- define "prometheus.alertmanager.fullname" -}} +{{- template "alertmanager.fullname" .Subcharts.alertmanager -}} +{{- end -}} + +{{/* +Create a fully qualified Prometheus server name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "prometheus.server.fullname" -}} +{{- if .Values.server.fullnameOverride -}} +{{- .Values.server.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- printf "%s-%s" .Release.Name .Values.server.name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s-%s" .Release.Name $name .Values.server.name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Get KubeVersion removing pre-release information. +*/}} +{{- define "prometheus.kubeVersion" -}} + {{- default .Capabilities.KubeVersion.Version (regexFind "v[0-9]+\\.[0-9]+\\.[0-9]+" .Capabilities.KubeVersion.Version) -}} +{{- end -}} + +{{/* +Return the appropriate apiVersion for deployment. +*/}} +{{- define "prometheus.deployment.apiVersion" -}} +{{- print "apps/v1" -}} +{{- end -}} + +{{/* +Return the appropriate apiVersion for networkpolicy. +*/}} +{{- define "prometheus.networkPolicy.apiVersion" -}} +{{- print "networking.k8s.io/v1" -}} +{{- end -}} + +{{/* +Return the appropriate apiVersion for poddisruptionbudget. +*/}} +{{- define "prometheus.podDisruptionBudget.apiVersion" -}} +{{- if .Capabilities.APIVersions.Has "policy/v1" }} +{{- print "policy/v1" -}} +{{- else -}} +{{- print "policy/v1beta1" -}} +{{- end -}} +{{- end -}} + +{{/* +Return the appropriate apiVersion for rbac. +*/}} +{{- define "rbac.apiVersion" -}} +{{- if .Capabilities.APIVersions.Has "rbac.authorization.k8s.io/v1" }} +{{- print "rbac.authorization.k8s.io/v1" -}} +{{- else -}} +{{- print "rbac.authorization.k8s.io/v1beta1" -}} +{{- end -}} +{{- end -}} + +{{/* +Return the appropriate apiVersion for ingress. +*/}} +{{- define "ingress.apiVersion" -}} + {{- if and (.Capabilities.APIVersions.Has "networking.k8s.io/v1") (semverCompare ">= 1.19.x" (include "prometheus.kubeVersion" .)) -}} + {{- print "networking.k8s.io/v1" -}} + {{- else if .Capabilities.APIVersions.Has "networking.k8s.io/v1beta1" -}} + {{- print "networking.k8s.io/v1beta1" -}} + {{- else -}} + {{- print "extensions/v1beta1" -}} + {{- end -}} +{{- end -}} + +{{/* +Return if ingress is stable. +*/}} +{{- define "ingress.isStable" -}} + {{- eq (include "ingress.apiVersion" .) "networking.k8s.io/v1" -}} +{{- end -}} + +{{/* +Return if ingress supports ingressClassName. +*/}} +{{- define "ingress.supportsIngressClassName" -}} + {{- or (eq (include "ingress.isStable" .) "true") (and (eq (include "ingress.apiVersion" .) "networking.k8s.io/v1beta1") (semverCompare ">= 1.18.x" (include "prometheus.kubeVersion" .))) -}} +{{- end -}} + +{{/* +Return if ingress supports pathType. +*/}} +{{- define "ingress.supportsPathType" -}} + {{- or (eq (include "ingress.isStable" .) "true") (and (eq (include "ingress.apiVersion" .) "networking.k8s.io/v1beta1") (semverCompare ">= 1.18.x" (include "prometheus.kubeVersion" .))) -}} +{{- end -}} + +{{/* +Create the name of the service account to use for the server component +*/}} +{{- define "prometheus.serviceAccountName.server" -}} +{{- if .Values.serviceAccounts.server.create -}} + {{ default (include "prometheus.server.fullname" .) .Values.serviceAccounts.server.name }} +{{- else -}} + {{ default "default" .Values.serviceAccounts.server.name }} +{{- end -}} +{{- end -}} + +{{/* +Define the prometheus.namespace template if set with forceNamespace or .Release.Namespace is set +*/}} +{{- define "prometheus.namespace" -}} + {{- default .Release.Namespace .Values.forceNamespace -}} +{{- end }} + +{{/* +Define template prometheus.namespaces producing a list of namespaces to monitor +*/}} +{{- define "prometheus.namespaces" -}} +{{- $namespaces := list }} +{{- if and .Values.rbac.create .Values.server.useExistingClusterRoleName }} + {{- if .Values.server.namespaces -}} + {{- range $ns := join "," .Values.server.namespaces | split "," }} + {{- $namespaces = append $namespaces (tpl $ns $) }} + {{- end -}} + {{- end -}} + {{- if .Values.server.releaseNamespace -}} + {{- $namespaces = append $namespaces (include "prometheus.namespace" .) }} + {{- end -}} +{{- end -}} +{{ mustToJson $namespaces }} +{{- end -}} + +{{/* +Define prometheus.server.remoteWrite producing a list of remoteWrite configurations with URL templating +*/}} +{{- define "prometheus.server.remoteWrite" -}} +{{- $remoteWrites := list }} +{{- range $remoteWrite := .Values.server.remoteWrite }} + {{- $remoteWrites = tpl $remoteWrite.url $ | set $remoteWrite "url" | append $remoteWrites }} +{{- end -}} +{{ toYaml $remoteWrites }} +{{- end -}} + +{{/* +Define prometheus.server.remoteRead producing a list of remoteRead configurations with URL templating +*/}} +{{- define "prometheus.server.remoteRead" -}} +{{- $remoteReads := list }} +{{- range $remoteRead := .Values.server.remoteRead }} + {{- $remoteReads = tpl $remoteRead.url $ | set $remoteRead "url" | append $remoteReads }} +{{- end -}} +{{ toYaml $remoteReads }} +{{- end -}} + diff --git a/sregym/observer/prometheus/prometheus/templates/clusterrole.yaml b/sregym/observer/prometheus/prometheus/templates/clusterrole.yaml new file mode 100644 index 0000000..0d7bafc --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/clusterrole.yaml @@ -0,0 +1,56 @@ +{{- if and .Values.rbac.create (empty .Values.server.useExistingClusterRoleName) -}} +apiVersion: {{ template "rbac.apiVersion" . }} +kind: ClusterRole +metadata: + labels: + {{- include "prometheus.server.labels" . | nindent 4 }} + name: {{ include "prometheus.clusterRoleName" . }} +rules: +{{- if and .Values.podSecurityPolicy.enabled (.Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy") }} + - apiGroups: + - extensions + resources: + - podsecuritypolicies + verbs: + - use + resourceNames: + - {{ template "prometheus.server.fullname" . }} +{{- end }} + - apiGroups: + - "" + resources: + - nodes + - nodes/proxy + - nodes/metrics + - services + - endpoints + - pods + - ingresses + - configmaps + verbs: + - get + - list + - watch + - apiGroups: + - "extensions" + - "networking.k8s.io" + resources: + - ingresses/status + - ingresses + verbs: + - get + - list + - watch + - apiGroups: + - "discovery.k8s.io" + resources: + - endpointslices + verbs: + - get + - list + - watch + - nonResourceURLs: + - "/metrics" + verbs: + - get +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/templates/clusterrolebinding.yaml b/sregym/observer/prometheus/prometheus/templates/clusterrolebinding.yaml new file mode 100644 index 0000000..c563bc5 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/clusterrolebinding.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.rbac.create (empty .Values.server.namespaces) (empty .Values.server.useExistingClusterRoleName) -}} +apiVersion: {{ template "rbac.apiVersion" . }} +kind: ClusterRoleBinding +metadata: + labels: + {{- include "prometheus.server.labels" . | nindent 4 }} + name: {{ include "prometheus.clusterRoleName" . }} +subjects: + - kind: ServiceAccount + name: {{ template "prometheus.serviceAccountName.server" . }} + namespace: {{ include "prometheus.namespace" . }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "prometheus.clusterRoleName" . }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/templates/cm.yaml b/sregym/observer/prometheus/prometheus/templates/cm.yaml new file mode 100644 index 0000000..34bd00d --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/cm.yaml @@ -0,0 +1,99 @@ +{{- if (empty .Values.server.configMapOverrideName) -}} +apiVersion: v1 +kind: ConfigMap +metadata: + labels: + {{- include "prometheus.server.labels" . | nindent 4 }} + {{- with .Values.server.extraConfigmapLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} + name: {{ template "prometheus.server.fullname" . }} + namespace: {{ include "prometheus.namespace" . }} +data: + allow-snippet-annotations: "false" +{{- $root := . -}} +{{- range $key, $value := .Values.ruleFiles }} + {{ $key }}: {{- toYaml $value | indent 2 }} +{{- end }} +{{- range $key, $value := .Values.serverFiles }} + {{ $key }}: | +{{- if eq $key "prometheus.yml" }} + global: +{{ $root.Values.server.global | toYaml | trimSuffix "\n" | indent 6 }} +{{- if $root.Values.server.remoteWrite }} + remote_write: +{{- include "prometheus.server.remoteWrite" $root | nindent 4 }} +{{- end }} +{{- if $root.Values.server.remoteRead }} + remote_read: +{{- include "prometheus.server.remoteRead" $root | nindent 4 }} +{{- end }} +{{- if or $root.Values.server.tsdb $root.Values.server.exemplars }} + storage: +{{- if $root.Values.server.tsdb }} + tsdb: +{{ $root.Values.server.tsdb | toYaml | indent 8 }} +{{- end }} +{{- if $root.Values.server.exemplars }} + exemplars: +{{ $root.Values.server.exemplars | toYaml | indent 8 }} +{{- end }} +{{- end }} +{{- if $root.Values.scrapeConfigFiles }} + scrape_config_files: +{{ toYaml $root.Values.scrapeConfigFiles | indent 4 }} +{{- end }} +{{- end }} +{{- if eq $key "alerts" }} +{{- if and (not (empty $value)) (empty $value.groups) }} + groups: +{{- range $ruleKey, $ruleValue := $value }} + - name: {{ $ruleKey -}}.rules + rules: +{{ $ruleValue | toYaml | trimSuffix "\n" | indent 6 }} +{{- end }} +{{- else }} +{{ toYaml $value | indent 4 }} +{{- end }} +{{- else }} +{{ toYaml $value | default "{}" | indent 4 }} +{{- end }} +{{- if eq $key "prometheus.yml" -}} +{{- if $root.Values.extraScrapeConfigs }} +{{ tpl $root.Values.extraScrapeConfigs $root | indent 4 }} +{{- end -}} +{{- if or ($root.Values.alertmanager.enabled) ($root.Values.server.alertmanagers) }} + alerting: +{{- if $root.Values.alertRelabelConfigs }} +{{ $root.Values.alertRelabelConfigs | toYaml | trimSuffix "\n" | indent 6 }} +{{- end }} + alertmanagers: +{{- if $root.Values.server.alertmanagers }} +{{ toYaml $root.Values.server.alertmanagers | indent 8 }} +{{- else }} + - kubernetes_sd_configs: + - role: pod + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + {{- if $root.Values.alertmanager.prefixURL }} + path_prefix: {{ $root.Values.alertmanager.prefixURL }} + {{- end }} + relabel_configs: + - source_labels: [__meta_kubernetes_namespace] + regex: {{ $root.Release.Namespace }} + action: keep + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_instance] + regex: {{ $root.Release.Name }} + action: keep + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name] + regex: {{ default "alertmanager" $root.Values.alertmanager.nameOverride | trunc 63 | trimSuffix "-" }} + action: keep + - source_labels: [__meta_kubernetes_pod_container_port_number] + regex: "9093" + action: keep +{{- end -}} +{{- end -}} +{{- end -}} +{{- end -}} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/templates/deploy.yaml b/sregym/observer/prometheus/prometheus/templates/deploy.yaml new file mode 100644 index 0000000..777f539 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/deploy.yaml @@ -0,0 +1,360 @@ +{{- if not .Values.server.statefulSet.enabled -}} +apiVersion: {{ template "prometheus.deployment.apiVersion" . }} +kind: Deployment +metadata: +{{- if .Values.server.deploymentAnnotations }} + annotations: + {{ toYaml .Values.server.deploymentAnnotations | nindent 4 }} +{{- end }} + labels: + {{- include "prometheus.server.labels" . | nindent 4 }} + name: {{ template "prometheus.server.fullname" . }} + namespace: {{ include "prometheus.namespace" . }} +spec: + selector: + matchLabels: + {{- include "prometheus.server.matchLabels" . | nindent 6 }} + replicas: {{ .Values.server.replicaCount }} + revisionHistoryLimit: {{ .Values.server.revisionHistoryLimit }} + {{- if .Values.server.strategy }} + strategy: +{{ toYaml .Values.server.strategy | trim | indent 4 }} + {{ if eq .Values.server.strategy.type "Recreate" }}rollingUpdate: null{{ end }} +{{- end }} + template: + metadata: + {{- if .Values.server.podAnnotations }} + annotations: + {{ toYaml .Values.server.podAnnotations | nindent 8 }} + {{- end }} + labels: + {{- include "prometheus.server.labels" . | nindent 8 }} + {{- if .Values.server.podLabels}} + {{ toYaml .Values.server.podLabels | nindent 8 }} + {{- end}} + spec: +{{- if .Values.server.priorityClassName }} + priorityClassName: "{{ .Values.server.priorityClassName }}" +{{- end }} +{{- if .Values.server.schedulerName }} + schedulerName: "{{ .Values.server.schedulerName }}" +{{- end }} +{{- if semverCompare ">=1.13-0" .Capabilities.KubeVersion.GitVersion }} + {{- if or (.Values.server.enableServiceLinks) (eq (.Values.server.enableServiceLinks | toString) "") }} + enableServiceLinks: true + {{- else }} + enableServiceLinks: false + {{- end }} +{{- end }} + serviceAccountName: {{ template "prometheus.serviceAccountName.server" . }} + {{- if .Values.server.extraInitContainers }} + initContainers: +{{ toYaml .Values.server.extraInitContainers | indent 8 }} + {{- end }} + containers: + {{- if .Values.configmapReload.prometheus.enabled }} + - name: {{ template "prometheus.name" . }}-{{ .Values.server.name }}-{{ .Values.configmapReload.prometheus.name }} + {{- if .Values.configmapReload.prometheus.image.digest }} + image: "{{ .Values.configmapReload.prometheus.image.repository }}@{{ .Values.configmapReload.prometheus.image.digest }}" + {{- else }} + image: "{{ .Values.configmapReload.prometheus.image.repository }}:{{ .Values.configmapReload.prometheus.image.tag }}" + {{- end }} + imagePullPolicy: "{{ .Values.configmapReload.prometheus.image.pullPolicy }}" + {{- with .Values.configmapReload.prometheus.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + args: + - --watched-dir=/etc/config + {{- $default_url := "http://127.0.0.1:9090/-/reload" }} + {{- with .Values.server.prefixURL }} + {{- $default_url = printf "http://127.0.0.1:9090%s/-/reload" . }} + {{- end }} + - --reload-url={{ default $default_url .Values.configmapReload.reloadUrl }} + {{- range $key, $value := .Values.configmapReload.prometheus.extraArgs }} + - --{{ $key }}={{ $value }} + {{- end }} + {{- range .Values.configmapReload.prometheus.extraVolumeDirs }} + - --watched-dir={{ . }} + {{- end }} + {{- with .Values.configmapReload.env }} + env: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.configmapReload.prometheus.containerPort }} + ports: + - containerPort: {{ .Values.configmapReload.prometheus.containerPort }} + {{- end }} + {{- with .Values.configmapReload.prometheus.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: config-volume + mountPath: /etc/config + readOnly: true + {{- range .Values.configmapReload.prometheus.extraConfigmapMounts }} + - name: {{ $.Values.configmapReload.prometheus.name }}-{{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath }} + readOnly: {{ .readOnly }} + {{- end }} + {{- with .Values.configmapReload.prometheus.extraVolumeMounts }} + {{ toYaml . | nindent 12 }} + {{- end }} + {{- end }} + + - name: {{ template "prometheus.name" . }}-{{ .Values.server.name }} + {{- if .Values.server.image.digest }} + image: "{{ .Values.server.image.repository }}@{{ .Values.server.image.digest }}" + {{- else }} + image: "{{ .Values.server.image.repository }}:{{ .Values.server.image.tag | default .Chart.AppVersion}}" + {{- end }} + imagePullPolicy: "{{ .Values.server.image.pullPolicy }}" + {{- with .Values.server.command }} + command: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.server.env }} + env: +{{ toYaml .Values.server.env | indent 12}} + {{- end }} + args: + {{- if .Values.server.defaultFlagsOverride }} + {{ toYaml .Values.server.defaultFlagsOverride | nindent 12}} + {{- else }} + {{- if .Values.server.retention }} + - --storage.tsdb.retention.time={{ .Values.server.retention }} + {{- end }} + {{- if .Values.server.retentionSize }} + - --storage.tsdb.retention.size={{ .Values.server.retentionSize }} + {{- end }} + - --config.file={{ .Values.server.configPath }} + {{- if .Values.server.storagePath }} + - --storage.tsdb.path={{ .Values.server.storagePath }} + {{- else }} + - --storage.tsdb.path={{ .Values.server.persistentVolume.mountPath }} + {{- end }} + - --web.console.libraries=/etc/prometheus/console_libraries + - --web.console.templates=/etc/prometheus/consoles + {{- range .Values.server.extraFlags }} + - --{{ . }} + {{- end }} + {{- range $key, $value := .Values.server.extraArgs }} + - --{{ $key }}={{ $value }} + {{- end }} + {{- if .Values.server.prefixURL }} + - --web.route-prefix={{ .Values.server.prefixURL }} + {{- end }} + {{- if .Values.server.baseURL }} + - --web.external-url={{ .Values.server.baseURL }} + {{- end }} + {{- end }} + ports: + - containerPort: 9090 + {{- if .Values.server.portName }} + name: {{ .Values.server.portName }} + {{- end }} + {{- if .Values.server.hostPort }} + hostPort: {{ .Values.server.hostPort }} + {{- end }} + readinessProbe: + {{- if not .Values.server.tcpSocketProbeEnabled }} + httpGet: + path: {{ .Values.server.prefixURL }}/-/ready + port: 9090 + scheme: {{ .Values.server.probeScheme }} + {{- with .Values.server.probeHeaders }} + httpHeaders: +{{- toYaml . | nindent 14 }} + {{- end }} + {{- else }} + tcpSocket: + port: 9090 + {{- end }} + initialDelaySeconds: {{ .Values.server.readinessProbeInitialDelay }} + periodSeconds: {{ .Values.server.readinessProbePeriodSeconds }} + timeoutSeconds: {{ .Values.server.readinessProbeTimeout }} + failureThreshold: {{ .Values.server.readinessProbeFailureThreshold }} + successThreshold: {{ .Values.server.readinessProbeSuccessThreshold }} + livenessProbe: + {{- if not .Values.server.tcpSocketProbeEnabled }} + httpGet: + path: {{ .Values.server.prefixURL }}/-/healthy + port: 9090 + scheme: {{ .Values.server.probeScheme }} + {{- with .Values.server.probeHeaders }} + httpHeaders: +{{- toYaml . | nindent 14 }} + {{- end }} + {{- else }} + tcpSocket: + port: 9090 + {{- end }} + initialDelaySeconds: {{ .Values.server.livenessProbeInitialDelay }} + periodSeconds: {{ .Values.server.livenessProbePeriodSeconds }} + timeoutSeconds: {{ .Values.server.livenessProbeTimeout }} + failureThreshold: {{ .Values.server.livenessProbeFailureThreshold }} + successThreshold: {{ .Values.server.livenessProbeSuccessThreshold }} + {{- if .Values.server.startupProbe.enabled }} + startupProbe: + {{- if not .Values.server.tcpSocketProbeEnabled }} + httpGet: + path: {{ .Values.server.prefixURL }}/-/healthy + port: 9090 + scheme: {{ .Values.server.probeScheme }} + {{- if .Values.server.probeHeaders }} + httpHeaders: + {{- range .Values.server.probeHeaders}} + - name: {{ .name }} + value: {{ .value }} + {{- end }} + {{- end }} + {{- else }} + tcpSocket: + port: 9090 + {{- end }} + failureThreshold: {{ .Values.server.startupProbe.failureThreshold }} + periodSeconds: {{ .Values.server.startupProbe.periodSeconds }} + timeoutSeconds: {{ .Values.server.startupProbe.timeoutSeconds }} + {{- end }} + {{- with .Values.server.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: config-volume + mountPath: /etc/config + - name: storage-volume + mountPath: {{ .Values.server.persistentVolume.mountPath }} + subPath: "{{ .Values.server.persistentVolume.subPath }}" + {{- range .Values.server.extraHostPathMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath }} + readOnly: {{ .readOnly }} + {{- end }} + {{- range .Values.server.extraConfigmapMounts }} + - name: {{ $.Values.server.name }}-{{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath }} + readOnly: {{ .readOnly }} + {{- end }} + {{- range .Values.server.extraSecretMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath }} + readOnly: {{ .readOnly }} + {{- end }} + {{- if .Values.server.extraVolumeMounts }} + {{ toYaml .Values.server.extraVolumeMounts | nindent 12 }} + {{- end }} + {{- with .Values.server.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.server.sidecarContainers }} + {{- range $name, $spec := .Values.server.sidecarContainers }} + - name: {{ $name }} + {{- if kindIs "string" $spec }} + {{- tpl $spec $ | nindent 10 }} + {{- else }} + {{- toYaml $spec | nindent 10 }} + {{- end }} + {{- end }} + {{- end }} + {{- if .Values.server.hostNetwork }} + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + {{- else }} + dnsPolicy: {{ .Values.server.dnsPolicy }} + {{- end }} + {{- if .Values.imagePullSecrets }} + imagePullSecrets: +{{ toYaml .Values.imagePullSecrets | indent 8 }} + {{- end }} + {{- if .Values.server.nodeSelector }} + nodeSelector: +{{ toYaml .Values.server.nodeSelector | indent 8 }} + {{- end }} + {{- if .Values.server.hostAliases }} + hostAliases: +{{ toYaml .Values.server.hostAliases | indent 8 }} + {{- end }} + {{- if .Values.server.dnsConfig }} + dnsConfig: +{{ toYaml .Values.server.dnsConfig | indent 8 }} + {{- end }} + {{- with .Values.server.securityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.server.tolerations }} + tolerations: +{{ toYaml .Values.server.tolerations | indent 8 }} + {{- end }} + {{- if .Values.server.affinity }} + affinity: +{{ toYaml .Values.server.affinity | indent 8 }} + {{- end }} + {{- with .Values.server.topologySpreadConstraints }} + topologySpreadConstraints: + {{- toYaml . | nindent 8 }} + {{- end }} + terminationGracePeriodSeconds: {{ .Values.server.terminationGracePeriodSeconds }} + volumes: + - name: config-volume + {{- if empty .Values.server.configFromSecret }} + configMap: + name: {{ if .Values.server.configMapOverrideName }}{{ .Release.Name }}-{{ .Values.server.configMapOverrideName }}{{- else }}{{ template "prometheus.server.fullname" . }}{{- end }} + {{- else }} + secret: + secretName: {{ .Values.server.configFromSecret }} + {{- end }} + {{- range .Values.server.extraHostPathMounts }} + - name: {{ .name }} + hostPath: + path: {{ .hostPath }} + {{- end }} + {{- range .Values.configmapReload.prometheus.extraConfigmapMounts }} + - name: {{ $.Values.configmapReload.prometheus.name }}-{{ .name }} + configMap: + name: {{ .configMap }} + {{- end }} + {{- range .Values.server.extraConfigmapMounts }} + - name: {{ $.Values.server.name }}-{{ .name }} + configMap: + name: {{ .configMap }} + {{- end }} + {{- range .Values.server.extraSecretMounts }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + {{- with .optional }} + optional: {{ . }} + {{- end }} + {{- end }} + {{- range .Values.configmapReload.prometheus.extraConfigmapMounts }} + - name: {{ .name }} + configMap: + name: {{ .configMap }} + {{- with .optional }} + optional: {{ . }} + {{- end }} + {{- end }} +{{- if .Values.server.extraVolumes }} +{{ toYaml .Values.server.extraVolumes | indent 8}} +{{- end }} + - name: storage-volume + {{- if .Values.server.persistentVolume.enabled }} + persistentVolumeClaim: + claimName: {{ if .Values.server.persistentVolume.existingClaim }}{{ .Values.server.persistentVolume.existingClaim }}{{- else }}{{ template "prometheus.server.fullname" . }}{{- end }} + {{- else }} + emptyDir: + {{- if .Values.server.emptyDir.sizeLimit }} + sizeLimit: {{ .Values.server.emptyDir.sizeLimit }} + {{- else }} + {} + {{- end -}} + {{- end -}} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/templates/extra-manifests.yaml b/sregym/observer/prometheus/prometheus/templates/extra-manifests.yaml new file mode 100644 index 0000000..1fa3a3c --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/extra-manifests.yaml @@ -0,0 +1,4 @@ +{{ range .Values.extraManifests }} +--- +{{ tpl . $ }} +{{ end }} diff --git a/sregym/observer/prometheus/prometheus/templates/headless-svc.yaml b/sregym/observer/prometheus/prometheus/templates/headless-svc.yaml new file mode 100644 index 0000000..653c745 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/headless-svc.yaml @@ -0,0 +1,35 @@ +{{- if .Values.server.statefulSet.enabled -}} +apiVersion: v1 +kind: Service +metadata: +{{- if .Values.server.statefulSet.headless.annotations }} + annotations: +{{ toYaml .Values.server.statefulSet.headless.annotations | indent 4 }} +{{- end }} + labels: + {{- include "prometheus.server.labels" . | nindent 4 }} +{{- if .Values.server.statefulSet.headless.labels }} +{{ toYaml .Values.server.statefulSet.headless.labels | indent 4 }} +{{- end }} + name: {{ template "prometheus.server.fullname" . }}-headless + namespace: {{ include "prometheus.namespace" . }} +spec: + clusterIP: None + ports: + - name: http + port: {{ .Values.server.statefulSet.headless.servicePort }} + protocol: TCP + targetPort: 9090 + {{- if .Values.server.statefulSet.headless.gRPC.enabled }} + - name: grpc + port: {{ .Values.server.statefulSet.headless.gRPC.servicePort }} + protocol: TCP + targetPort: 10901 + {{- if .Values.server.statefulSet.headless.gRPC.nodePort }} + nodePort: {{ .Values.server.statefulSet.headless.gRPC.nodePort }} + {{- end }} + {{- end }} + + selector: + {{- include "prometheus.server.matchLabels" . | nindent 4 }} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/templates/ingress.yaml b/sregym/observer/prometheus/prometheus/templates/ingress.yaml new file mode 100644 index 0000000..0e022e5 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/ingress.yaml @@ -0,0 +1,57 @@ +{{- if .Values.server.ingress.enabled -}} +{{- $ingressApiIsStable := eq (include "ingress.isStable" .) "true" -}} +{{- $ingressSupportsIngressClassName := eq (include "ingress.supportsIngressClassName" .) "true" -}} +{{- $ingressSupportsPathType := eq (include "ingress.supportsPathType" .) "true" -}} +{{- $releaseName := .Release.Name -}} +{{- $serviceName := include "prometheus.server.fullname" . }} +{{- $servicePort := .Values.server.ingress.servicePort | default .Values.server.service.servicePort -}} +{{- $ingressPath := .Values.server.ingress.path -}} +{{- $ingressPathType := .Values.server.ingress.pathType -}} +{{- $extraPaths := .Values.server.ingress.extraPaths -}} +apiVersion: {{ template "ingress.apiVersion" . }} +kind: Ingress +metadata: +{{- if .Values.server.ingress.annotations }} + annotations: +{{ toYaml .Values.server.ingress.annotations | indent 4 }} +{{- end }} + labels: + {{- include "prometheus.server.labels" . | nindent 4 }} +{{- range $key, $value := .Values.server.ingress.extraLabels }} + {{ $key }}: {{ $value }} +{{- end }} + name: {{ template "prometheus.server.fullname" . }} + namespace: {{ include "prometheus.namespace" . }} +spec: + {{- if and $ingressSupportsIngressClassName .Values.server.ingress.ingressClassName }} + ingressClassName: {{ .Values.server.ingress.ingressClassName }} + {{- end }} + rules: + {{- range .Values.server.ingress.hosts }} + {{- $url := splitList "/" . }} + - host: {{ first $url }} + http: + paths: +{{ if $extraPaths }} +{{ toYaml $extraPaths | indent 10 }} +{{- end }} + - path: {{ $ingressPath }} + {{- if $ingressSupportsPathType }} + pathType: {{ $ingressPathType }} + {{- end }} + backend: + {{- if $ingressApiIsStable }} + service: + name: {{ $serviceName }} + port: + number: {{ $servicePort }} + {{- else }} + serviceName: {{ $serviceName }} + servicePort: {{ $servicePort }} + {{- end }} + {{- end -}} +{{- if .Values.server.ingress.tls }} + tls: +{{ toYaml .Values.server.ingress.tls | indent 4 }} + {{- end -}} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/templates/network-policy.yaml b/sregym/observer/prometheus/prometheus/templates/network-policy.yaml new file mode 100644 index 0000000..a99b3df --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/network-policy.yaml @@ -0,0 +1,16 @@ +{{- if .Values.networkPolicy.enabled }} +apiVersion: {{ template "prometheus.networkPolicy.apiVersion" . }} +kind: NetworkPolicy +metadata: + name: {{ template "prometheus.server.fullname" . }} + namespace: {{ include "prometheus.namespace" . }} + labels: + {{- include "prometheus.server.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "prometheus.server.matchLabels" . | nindent 6 }} + ingress: + - ports: + - port: 9090 +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/templates/pdb.yaml b/sregym/observer/prometheus/prometheus/templates/pdb.yaml new file mode 100644 index 0000000..72f61fe --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/pdb.yaml @@ -0,0 +1,15 @@ +{{- if .Values.server.podDisruptionBudget.enabled }} +{{- $pdbSpec := omit .Values.server.podDisruptionBudget "enabled" }} +apiVersion: {{ template "prometheus.podDisruptionBudget.apiVersion" . }} +kind: PodDisruptionBudget +metadata: + name: {{ template "prometheus.server.fullname" . }} + namespace: {{ include "prometheus.namespace" . }} + labels: + {{- include "prometheus.server.labels" . | nindent 4 }} +spec: + selector: + matchLabels: + {{- include "prometheus.server.matchLabels" . | nindent 6 }} + {{- toYaml $pdbSpec | nindent 2 }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/templates/psp.yaml b/sregym/observer/prometheus/prometheus/templates/psp.yaml new file mode 100644 index 0000000..88cc1f0 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/psp.yaml @@ -0,0 +1,53 @@ +{{- if and .Values.rbac.create .Values.podSecurityPolicy.enabled }} +{{- if .Capabilities.APIVersions.Has "policy/v1beta1/PodSecurityPolicy" }} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "prometheus.server.fullname" . }} + labels: + {{- include "prometheus.server.labels" . | nindent 4 }} + {{- with .Values.server.podSecurityPolicy.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + privileged: false + allowPrivilegeEscalation: false + allowedCapabilities: + - 'CHOWN' + volumes: + - 'configMap' + - 'persistentVolumeClaim' + - 'emptyDir' + - 'secret' + - 'hostPath' + allowedHostPaths: + - pathPrefix: /etc + readOnly: true + - pathPrefix: {{ .Values.server.persistentVolume.mountPath }} + {{- range .Values.server.extraHostPathMounts }} + - pathPrefix: {{ .hostPath }} + readOnly: {{ .readOnly }} + {{- end }} + hostNetwork: false + hostPID: false + hostIPC: false + runAsUser: + rule: 'RunAsAny' + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + # Forbid adding the root group. + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + # Forbid adding the root group. + - min: 1 + max: 65535 + readOnlyRootFilesystem: false +{{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/templates/pvc.yaml b/sregym/observer/prometheus/prometheus/templates/pvc.yaml new file mode 100644 index 0000000..a0f3345 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/pvc.yaml @@ -0,0 +1,43 @@ +{{- if not .Values.server.statefulSet.enabled -}} +{{- if .Values.server.persistentVolume.enabled -}} +{{- if not .Values.server.persistentVolume.existingClaim -}} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + {{- if .Values.server.persistentVolume.annotations }} + annotations: +{{ toYaml .Values.server.persistentVolume.annotations | indent 4 }} + {{- end }} + labels: + {{- include "prometheus.server.labels" . | nindent 4 }} + {{- with .Values.server.persistentVolume.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} + name: {{ template "prometheus.server.fullname" . }} + namespace: {{ include "prometheus.namespace" . }} +spec: + accessModes: +{{ toYaml .Values.server.persistentVolume.accessModes | indent 4 }} +{{- if .Values.server.persistentVolume.storageClass }} +{{- if (eq "-" .Values.server.persistentVolume.storageClass) }} + storageClassName: "" +{{- else }} + storageClassName: "{{ .Values.server.persistentVolume.storageClass }}" +{{- end }} +{{- end }} +{{- if .Values.server.persistentVolume.volumeBindingMode }} + volumeBindingMode: "{{ .Values.server.persistentVolume.volumeBindingMode }}" +{{- end }} + resources: + requests: + storage: "{{ .Values.server.persistentVolume.size }}" +{{- if .Values.server.persistentVolume.selector }} + selector: + {{- toYaml .Values.server.persistentVolume.selector | nindent 4 }} +{{- end -}} +{{- if .Values.server.persistentVolume.volumeName }} + volumeName: "{{ .Values.server.persistentVolume.volumeName }}" +{{- end -}} +{{- end -}} +{{- end -}} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/templates/rolebinding.yaml b/sregym/observer/prometheus/prometheus/templates/rolebinding.yaml new file mode 100644 index 0000000..c80ae11 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/rolebinding.yaml @@ -0,0 +1,18 @@ +{{- range include "prometheus.namespaces" . | fromJsonArray }} +--- +apiVersion: {{ template "rbac.apiVersion" $ }} +kind: RoleBinding +metadata: + labels: + {{- include "prometheus.server.labels" $ | nindent 4 }} + name: {{ template "prometheus.server.fullname" $ }} + namespace: {{ . }} +subjects: + - kind: ServiceAccount + name: {{ template "prometheus.serviceAccountName.server" $ }} + namespace: {{ include "prometheus.namespace" $ }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ $.Values.server.useExistingClusterRoleName }} +{{ end -}} diff --git a/sregym/observer/prometheus/prometheus/templates/service.yaml b/sregym/observer/prometheus/prometheus/templates/service.yaml new file mode 100644 index 0000000..57d4357 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/service.yaml @@ -0,0 +1,63 @@ +{{- if .Values.server.service.enabled -}} +apiVersion: v1 +kind: Service +metadata: +{{- if .Values.server.service.annotations }} + annotations: +{{ toYaml .Values.server.service.annotations | indent 4 }} +{{- end }} + labels: + {{- include "prometheus.server.labels" . | nindent 4 }} +{{- if .Values.server.service.labels }} +{{ toYaml .Values.server.service.labels | indent 4 }} +{{- end }} + name: {{ template "prometheus.server.fullname" . }} + namespace: {{ include "prometheus.namespace" . }} +spec: +{{- if .Values.server.service.clusterIP }} + clusterIP: {{ .Values.server.service.clusterIP }} +{{- end }} +{{- if .Values.server.service.externalIPs }} + externalIPs: +{{ toYaml .Values.server.service.externalIPs | indent 4 }} +{{- end }} +{{- if .Values.server.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.server.service.loadBalancerIP }} +{{- end }} +{{- if .Values.server.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: + {{- range $cidr := .Values.server.service.loadBalancerSourceRanges }} + - {{ $cidr }} + {{- end }} +{{- end }} + ports: + - name: http + port: {{ .Values.server.service.servicePort }} + protocol: TCP + targetPort: 9090 + {{- if .Values.server.service.nodePort }} + nodePort: {{ .Values.server.service.nodePort }} + {{- end }} + {{- if .Values.server.service.gRPC.enabled }} + - name: grpc + port: {{ .Values.server.service.gRPC.servicePort }} + protocol: TCP + targetPort: 10901 + {{- if .Values.server.service.gRPC.nodePort }} + nodePort: {{ .Values.server.service.gRPC.nodePort }} + {{- end }} + {{- end }} +{{- if .Values.server.service.additionalPorts }} +{{ toYaml .Values.server.service.additionalPorts | indent 4 }} +{{- end }} + selector: + {{- if and .Values.server.statefulSet.enabled .Values.server.service.statefulsetReplica.enabled }} + statefulset.kubernetes.io/pod-name: {{ template "prometheus.server.fullname" . }}-{{ .Values.server.service.statefulsetReplica.replica }} + {{- else -}} + {{- include "prometheus.server.matchLabels" . | nindent 4 }} +{{- if .Values.server.service.sessionAffinity }} + sessionAffinity: {{ .Values.server.service.sessionAffinity }} +{{- end }} + {{- end }} + type: "{{ .Values.server.service.type }}" +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/templates/serviceaccount.yaml b/sregym/observer/prometheus/prometheus/templates/serviceaccount.yaml new file mode 100644 index 0000000..63070c6 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/serviceaccount.yaml @@ -0,0 +1,14 @@ +{{- if .Values.serviceAccounts.server.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + {{- include "prometheus.server.labels" . | nindent 4 }} + name: {{ template "prometheus.serviceAccountName.server" . }} + namespace: {{ include "prometheus.namespace" . }} + annotations: +{{ toYaml .Values.serviceAccounts.server.annotations | indent 4 }} +{{- if .Values.serviceAccounts.server.automountServiceAccountToken }} +automountServiceAccountToken: {{ .Values.serviceAccounts.server.automountServiceAccountToken }} +{{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/templates/sts.yaml b/sregym/observer/prometheus/prometheus/templates/sts.yaml new file mode 100644 index 0000000..a7d6d58 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/sts.yaml @@ -0,0 +1,382 @@ +{{- if .Values.server.statefulSet.enabled -}} +apiVersion: apps/v1 +kind: StatefulSet +metadata: +{{- if .Values.server.statefulSet.annotations }} + annotations: + {{ toYaml .Values.server.statefulSet.annotations | nindent 4 }} +{{- end }} + labels: + {{- include "prometheus.server.labels" . | nindent 4 }} + {{- if .Values.server.statefulSet.labels}} + {{ toYaml .Values.server.statefulSet.labels | nindent 4 }} + {{- end}} + name: {{ template "prometheus.server.fullname" . }} + namespace: {{ include "prometheus.namespace" . }} +spec: + {{- if semverCompare ">= 1.27.x" (include "prometheus.kubeVersion" .) }} + persistentVolumeClaimRetentionPolicy: + whenDeleted: {{ ternary "Delete" "Retain" .Values.server.statefulSet.pvcDeleteOnStsDelete }} + whenScaled: {{ ternary "Delete" "Retain" .Values.server.statefulSet.pvcDeleteOnStsScale }} + {{- end }} + serviceName: {{ template "prometheus.server.fullname" . }}-headless + selector: + matchLabels: + {{- include "prometheus.server.matchLabels" . | nindent 6 }} + replicas: {{ .Values.server.replicaCount }} + revisionHistoryLimit: {{ .Values.server.revisionHistoryLimit }} + podManagementPolicy: {{ .Values.server.statefulSet.podManagementPolicy }} + template: + metadata: + {{- if .Values.server.podAnnotations }} + annotations: + {{ toYaml .Values.server.podAnnotations | nindent 8 }} + {{- end }} + labels: + {{- include "prometheus.server.labels" . | nindent 8 }} + {{- if .Values.server.podLabels}} + {{ toYaml .Values.server.podLabels | nindent 8 }} + {{- end}} + spec: +{{- if .Values.server.priorityClassName }} + priorityClassName: "{{ .Values.server.priorityClassName }}" +{{- end }} +{{- if .Values.server.schedulerName }} + schedulerName: "{{ .Values.server.schedulerName }}" +{{- end }} +{{- if semverCompare ">=1.13-0" .Capabilities.KubeVersion.GitVersion }} + {{- if or (.Values.server.enableServiceLinks) (eq (.Values.server.enableServiceLinks | toString) "") }} + enableServiceLinks: true + {{- else }} + enableServiceLinks: false + {{- end }} +{{- end }} + serviceAccountName: {{ template "prometheus.serviceAccountName.server" . }} + {{- if .Values.server.extraInitContainers }} + initContainers: +{{ toYaml .Values.server.extraInitContainers | indent 8 }} + {{- end }} + containers: + {{- if .Values.configmapReload.prometheus.enabled }} + - name: {{ template "prometheus.name" . }}-{{ .Values.server.name }}-{{ .Values.configmapReload.prometheus.name }} + {{- if .Values.configmapReload.prometheus.image.digest }} + image: "{{ .Values.configmapReload.prometheus.image.repository }}@{{ .Values.configmapReload.prometheus.image.digest }}" + {{- else }} + image: "{{ .Values.configmapReload.prometheus.image.repository }}:{{ .Values.configmapReload.prometheus.image.tag }}" + {{- end }} + imagePullPolicy: "{{ .Values.configmapReload.prometheus.image.pullPolicy }}" + {{- with .Values.configmapReload.prometheus.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + args: + - --watched-dir=/etc/config + {{- $default_url := "http://127.0.0.1:9090/-/reload" }} + {{- with .Values.server.prefixURL }} + {{- $default_url = printf "http://127.0.0.1:9090%s/-/reload" . }} + {{- end }} + - --reload-url={{ default $default_url .Values.configmapReload.reloadUrl }} + {{- range $key, $value := .Values.configmapReload.prometheus.extraArgs }} + - --{{ $key }}={{ $value }} + {{- end }} + {{- range .Values.configmapReload.prometheus.extraVolumeDirs }} + - --watched-dir={{ . }} + {{- end }} + {{- with .Values.configmapReload.env }} + env: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.configmapReload.prometheus.containerPort }} + ports: + - containerPort: {{ .Values.configmapReload.prometheus.containerPort }} + {{- end }} + {{- with .Values.configmapReload.prometheus.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: config-volume + mountPath: /etc/config + readOnly: true + {{- range .Values.configmapReload.prometheus.extraConfigmapMounts }} + - name: {{ $.Values.configmapReload.prometheus.name }}-{{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath }} + readOnly: {{ .readOnly }} + {{- end }} + {{- end }} + + - name: {{ template "prometheus.name" . }}-{{ .Values.server.name }} + {{- if .Values.server.image.digest }} + image: "{{ .Values.server.image.repository }}@{{ .Values.server.image.digest }}" + {{- else }} + image: "{{ .Values.server.image.repository }}:{{ .Values.server.image.tag | default .Chart.AppVersion }}" + {{- end }} + imagePullPolicy: "{{ .Values.server.image.pullPolicy }}" + {{- with .Values.server.command }} + command: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.server.env }} + env: +{{ toYaml .Values.server.env | indent 12}} + {{- end }} + args: + {{- if .Values.server.defaultFlagsOverride }} + {{ toYaml .Values.server.defaultFlagsOverride | nindent 12}} + {{- else }} + {{- if .Values.server.prefixURL }} + - --web.route-prefix={{ .Values.server.prefixURL }} + {{- end }} + {{- if .Values.server.retention }} + - --storage.tsdb.retention.time={{ .Values.server.retention }} + {{- end }} + {{- if .Values.server.retentionSize }} + - --storage.tsdb.retention.size={{ .Values.server.retentionSize }} + {{- end }} + - --config.file={{ .Values.server.configPath }} + {{- if .Values.server.storagePath }} + - --storage.tsdb.path={{ .Values.server.storagePath }} + {{- else }} + - --storage.tsdb.path={{ .Values.server.persistentVolume.mountPath }} + {{- end }} + - --web.console.libraries=/etc/prometheus/console_libraries + - --web.console.templates=/etc/prometheus/consoles + {{- range .Values.server.extraFlags }} + - --{{ . }} + {{- end }} + {{- range $key, $value := .Values.server.extraArgs }} + - --{{ $key }}={{ $value }} + {{- end }} + {{- if .Values.server.baseURL }} + - --web.external-url={{ .Values.server.baseURL }} + {{- end }} + {{- end }} + ports: + - containerPort: 9090 + {{- if .Values.server.portName }} + name: {{ .Values.server.portName }} + {{- end }} + {{- if .Values.server.hostPort }} + hostPort: {{ .Values.server.hostPort }} + {{- end }} + readinessProbe: + {{- if not .Values.server.tcpSocketProbeEnabled }} + httpGet: + path: {{ .Values.server.prefixURL }}/-/ready + port: 9090 + scheme: {{ .Values.server.probeScheme }} + {{- with .Values.server.probeHeaders }} + httpHeaders: +{{- toYaml . | nindent 14 }} + {{- end }} + {{- else }} + tcpSocket: + port: 9090 + {{- end }} + initialDelaySeconds: {{ .Values.server.readinessProbeInitialDelay }} + periodSeconds: {{ .Values.server.readinessProbePeriodSeconds }} + timeoutSeconds: {{ .Values.server.readinessProbeTimeout }} + failureThreshold: {{ .Values.server.readinessProbeFailureThreshold }} + successThreshold: {{ .Values.server.readinessProbeSuccessThreshold }} + livenessProbe: + {{- if not .Values.server.tcpSocketProbeEnabled }} + httpGet: + path: {{ .Values.server.prefixURL }}/-/healthy + port: 9090 + scheme: {{ .Values.server.probeScheme }} + {{- with .Values.server.probeHeaders }} + httpHeaders: +{{- toYaml . | nindent 14 }} + {{- end }} + {{- else }} + tcpSocket: + port: 9090 + {{- end }} + initialDelaySeconds: {{ .Values.server.livenessProbeInitialDelay }} + periodSeconds: {{ .Values.server.livenessProbePeriodSeconds }} + timeoutSeconds: {{ .Values.server.livenessProbeTimeout }} + failureThreshold: {{ .Values.server.livenessProbeFailureThreshold }} + successThreshold: {{ .Values.server.livenessProbeSuccessThreshold }} + {{- if .Values.server.startupProbe.enabled }} + startupProbe: + {{- if not .Values.server.tcpSocketProbeEnabled }} + httpGet: + path: {{ .Values.server.prefixURL }}/-/healthy + port: 9090 + scheme: {{ .Values.server.probeScheme }} + {{- if .Values.server.probeHeaders }} + httpHeaders: + {{- range .Values.server.probeHeaders}} + - name: {{ .name }} + value: {{ .value }} + {{- end }} + {{- end }} + {{- else }} + tcpSocket: + port: 9090 + {{- end }} + failureThreshold: {{ .Values.server.startupProbe.failureThreshold }} + periodSeconds: {{ .Values.server.startupProbe.periodSeconds }} + timeoutSeconds: {{ .Values.server.startupProbe.timeoutSeconds }} + {{- end }} + {{- with .Values.server.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: config-volume + mountPath: /etc/config + - name: {{ ternary .Values.server.persistentVolume.statefulSetNameOverride "storage-volume" (and .Values.server.persistentVolume.enabled (not (empty .Values.server.persistentVolume.statefulSetNameOverride))) }} + mountPath: {{ .Values.server.persistentVolume.mountPath }} + subPath: "{{ .Values.server.persistentVolume.subPath }}" + {{- range .Values.server.extraHostPathMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath }} + readOnly: {{ .readOnly }} + {{- end }} + {{- range .Values.server.extraConfigmapMounts }} + - name: {{ $.Values.server.name }}-{{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath }} + readOnly: {{ .readOnly }} + {{- end }} + {{- range .Values.server.extraSecretMounts }} + - name: {{ .name }} + mountPath: {{ .mountPath }} + subPath: {{ .subPath }} + readOnly: {{ .readOnly }} + {{- end }} + {{- if .Values.server.extraVolumeMounts }} + {{ toYaml .Values.server.extraVolumeMounts | nindent 12 }} + {{- end }} + {{- with .Values.server.containerSecurityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.server.sidecarContainers }} + {{- range $name, $spec := .Values.server.sidecarContainers }} + - name: {{ $name }} + {{- if kindIs "string" $spec }} + {{- tpl $spec $ | nindent 10 }} + {{- else }} + {{- toYaml $spec | nindent 10 }} + {{- end }} + {{- end }} + {{- end }} + hostNetwork: {{ .Values.server.hostNetwork }} + {{- if .Values.server.dnsPolicy }} + dnsPolicy: {{ .Values.server.dnsPolicy }} + {{- end }} + {{- if .Values.imagePullSecrets }} + imagePullSecrets: +{{ toYaml .Values.imagePullSecrets | indent 8 }} + {{- end }} + {{- if .Values.server.nodeSelector }} + nodeSelector: +{{ toYaml .Values.server.nodeSelector | indent 8 }} + {{- end }} + {{- if .Values.server.hostAliases }} + hostAliases: +{{ toYaml .Values.server.hostAliases | indent 8 }} + {{- end }} + {{- if .Values.server.dnsConfig }} + dnsConfig: +{{ toYaml .Values.server.dnsConfig | indent 8 }} + {{- end }} + {{- with .Values.server.securityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.server.tolerations }} + tolerations: +{{ toYaml .Values.server.tolerations | indent 8 }} + {{- end }} + {{- if .Values.server.affinity }} + affinity: +{{ toYaml .Values.server.affinity | indent 8 }} + {{- end }} + {{- with .Values.server.topologySpreadConstraints }} + topologySpreadConstraints: + {{- toYaml . | nindent 8 }} + {{- end }} + terminationGracePeriodSeconds: {{ .Values.server.terminationGracePeriodSeconds }} + volumes: + - name: config-volume + {{- if empty .Values.server.configFromSecret }} + configMap: + name: {{ if .Values.server.configMapOverrideName }}{{ .Release.Name }}-{{ .Values.server.configMapOverrideName }}{{- else }}{{ template "prometheus.server.fullname" . }}{{- end }} + {{- else }} + secret: + secretName: {{ .Values.server.configFromSecret }} + {{- end }} + {{- range .Values.server.extraHostPathMounts }} + - name: {{ .name }} + hostPath: + path: {{ .hostPath }} + {{- end }} + {{- range .Values.configmapReload.prometheus.extraConfigmapMounts }} + - name: {{ $.Values.configmapReload.prometheus.name }}-{{ .name }} + configMap: + name: {{ .configMap }} + {{- end }} + {{- range .Values.server.extraConfigmapMounts }} + - name: {{ $.Values.server.name }}-{{ .name }} + configMap: + name: {{ .configMap }} + {{- end }} + {{- range .Values.server.extraSecretMounts }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + {{- with .optional }} + optional: {{ . }} + {{- end }} + {{- end }} + {{- range .Values.configmapReload.prometheus.extraConfigmapMounts }} + - name: {{ .name }} + configMap: + name: {{ .configMap }} + {{- with .optional }} + optional: {{ . }} + {{- end }} + {{- end }} +{{- if .Values.server.extraVolumes }} +{{ toYaml .Values.server.extraVolumes | indent 8}} +{{- end }} +{{- if .Values.server.persistentVolume.enabled }} + volumeClaimTemplates: + - metadata: + name: {{ .Values.server.persistentVolume.statefulSetNameOverride | default "storage-volume" }} + {{- if .Values.server.persistentVolume.annotations }} + annotations: +{{ toYaml .Values.server.persistentVolume.annotations | indent 10 }} + {{- end }} + {{- if .Values.server.persistentVolume.labels }} + labels: +{{ toYaml .Values.server.persistentVolume.labels | indent 10 }} + {{- end }} + spec: + accessModes: +{{ toYaml .Values.server.persistentVolume.accessModes | indent 10 }} + resources: + requests: + storage: "{{ .Values.server.persistentVolume.size }}" + {{- if .Values.server.persistentVolume.storageClass }} + {{- if (eq "-" .Values.server.persistentVolume.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: "{{ .Values.server.persistentVolume.storageClass }}" + {{- end }} + {{- end }} +{{- else }} + - name: storage-volume + emptyDir: + {{- if .Values.server.emptyDir.sizeLimit }} + sizeLimit: {{ .Values.server.emptyDir.sizeLimit }} + {{- else }} + {} + {{- end -}} +{{- end }} +{{- end }} diff --git a/sregym/observer/prometheus/prometheus/templates/vpa.yaml b/sregym/observer/prometheus/prometheus/templates/vpa.yaml new file mode 100644 index 0000000..85dd4a4 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/templates/vpa.yaml @@ -0,0 +1,26 @@ +{{- if .Values.server.verticalAutoscaler.enabled -}} +{{- if .Capabilities.APIVersions.Has "autoscaling.k8s.io/v1/VerticalPodAutoscaler" }} +apiVersion: autoscaling.k8s.io/v1 +{{- else }} +apiVersion: autoscaling.k8s.io/v1beta2 +{{- end }} +kind: VerticalPodAutoscaler +metadata: + name: {{ template "prometheus.server.fullname" . }}-vpa + namespace: {{ include "prometheus.namespace" . }} + labels: + {{- include "prometheus.server.labels" . | nindent 4 }} +spec: + targetRef: + apiVersion: "apps/v1" +{{- if .Values.server.statefulSet.enabled }} + kind: StatefulSet +{{- else }} + kind: Deployment +{{- end }} + name: {{ template "prometheus.server.fullname" . }} + updatePolicy: + updateMode: {{ .Values.server.verticalAutoscaler.updateMode | default "Off" | quote }} + resourcePolicy: + containerPolicies: {{ .Values.server.verticalAutoscaler.containerPolicies | default list | toYaml | trim | nindent 4 }} +{{- end -}} diff --git a/sregym/observer/prometheus/prometheus/tmpcharts-91028/alertmanager-1.7.0.tgz b/sregym/observer/prometheus/prometheus/tmpcharts-91028/alertmanager-1.7.0.tgz new file mode 100644 index 0000000000000000000000000000000000000000..6e225fe3b52bffd48c0550381acfbdac2d40e3fc GIT binary patch literal 13358 zcmV+}G||f+iwG0|00000|0w_~VMtOiV@ORlOnEsqVl!4SWK%V1T2nbTPgYhoO;>Dc zVQyr3R8em|NM&qo0PMYcbKAI*I68mpQ{c+}RpMEblI=J-qq;e}K1pWAb@Fh<&Yap! zWit>7Nf?s=2LNqr9N*7=3ojCUQ?l$N=V$(r$Rr-!jYfB)(P+R3F%d%o=ZKA7EFcs9 z62{RFn|#LO@%ZVJC+h$4cwGK}?_h8LhrK6<ub;Q$eiCg3Q6!2)^v{gG@4^oJ)rR@VKhc!fl}@TG>%h(#d5Sj zQS1e>5k7zk{tYpXDVcza{Q*pp{Lit!|I{B3Lc{}x6QN$7Ef6?z_!=xA2_wXTVs;KV zA_7d8Ac(O11(PJgfu;fAX|Mp01F?``pJbo`j@Sia{$N2RdKQfwc1ei=4rKrKBZHT| z#xv!J1(XOxL3uhAg1b|7~*Imt5Si^_YeMSE?-DM z2fz%c`IZ+ZkRufTmtc@^EPR+GSy}z*JdGl_80c57lo@cv6sqS|ZWtA6iDe=i%ExMO zVR75@pZVj#*Pcn==TngX7cfc@-?##3m;d_*d(RF_^8d-+`00cEzmI1N$kFy9NaiV= zBTtLD3E1Yr0>$vTVF~@0{|BdJumxVC8B8Mq^hYqGtT4mLr_1Q`MskYPBL(pgY2MIezvF9etf3YIiwK%h8@pg`Oo3>Z?={{>Bn zn1H>(U<w0!E`~3WIY8{(q)Z#0V0IYg8n_ z90^%xZ#-b=0!sn&t8Vi(j zEGOW2_LhnhhNLbU0I&t#T_DDAh?MF9ss|w899%9aM<6s1WBpkvMvyB)e*k3R2^bCs z19T-AJYsV`0oON!!PXWy3PUVKCX9-D@e~CN2@p$ia!nUZq@K*n>u=x^iv{4SIQLCG z4&syxIc6~=DU6~e2pEDwjh+Hi3V)u!0QnjyUGI%*asobnQixlNLAUNn{^05Z(4C(^ zv6z4n62Zs-V7~Hns=p0~_Ko`fwxV*pnzk8&;X4v7CqS?inK3;{qsSnq;FFNqfl>X; z(kQCD+}g@9wI2X2jA!;8TrP01Q12Krnh;##D3WvxQ4-OmqTis>q8?nB3aZF*ir57X z&`}V`zs~46A{G^1_%u-npC%~4Gb}q>NbY0+0#gn^T@dgEO`}i(V#aX*%rDcjZ0IOA z7}eYgL;0V?r_`J@6dmJI@}jJNW~d=&Y)viMSjv&F${x=Ep+ZebNJ0aPngc9Fy8!Bj zFb8u)5L29&a!lr`VLMcr0Lcs#Nyja4Dmge|m@+JuFCxhKTh$sVuntIQIH;5=v>|0< zm_~>-)-ROMbrTw%q99eRO2hCV1b;kgzx$MBJhJKl}4o(QfdDA=nBS3gtB7s(hmvub$y?wvl+fp zA8C@U4z?TvpM*4EeY;do74)# z2u@LC|Illk7kZ)|pA2j}KaGDnkg`~RII-^}=R_2S|9}xBfvh2AixSeD+R>}__mIr>bXDdFiI!$;VGtm$*rC`tm@yhl-rxuvpJ>^DXVQoR zmrzUg8D&z`1Fp4E00<3{#K#A3j4hFJWUUPaurC3A{??S!LeTs%8EDHX(YzWhO}k5= z1|3A03#n!tb4JpW19X*8X=G9YrZn$Lf~;!F#_enXS^}qWmN8Bcd#~R~7C}T>!{G$XV8l@k&nTSb=uHjffi@%LNHaBN zgKWT3WlGOLr6=FA!O-P{VFx4VzF#6jrOAw?0>?Z3X?IALgpufWZXcYO`EO>MLIy}9ul>%COt z1a58&N?X9RWO1$oCLgE;yg=Z0uh`-5svVdlh=eG!2oMPkhwC0DVCasWwzUj6c?VkL zhi%}PR&~`080M@u)HIjNKyFqJsv(O9OQUQootfl;QC?9_oz_aNbIV8}wT@PUYz!v5 z3ORdy3IfE0EG+dz*;mqlZ`Nk-CAOaBr_RhC4lQ(AM}=nI*Yly7qds^6Nj4~g&h=tZ zX=I6qm%B5z+%Ai=twz$;&y6;S3NypZjhg=aq;+QKy4onKXUjph5YLub|C0D?dNUNz zI7TE?My9lDjg4VlGWjxIu2H2`gyfu2su6MkB}ec8(^QXZu%OmXkOW9MV#a)i5tr7m z9AJ*P6wD;h|4W;Nr9|2*h+JSsiDX3Mm5HzrW*wJ-vYYK=yuk!Zt>nwav^2#+nptBO z6Q^2AG`2!7p{X?ZFqx~?BWe5Y0vP@wc~UG;EIS}bT7!@+cNP9}33Z_t8bQjUC73d( zXfqRtfk!#1ANi)y80TsxNYV+|8;@h_079|yt?ljq?F}|4{+S|PsZp+Qu>S@R2GZC# zMG*=FWm-ZD8X;!ow=`&^X+f}9X1=l$8UjcJ_UsoBKq6V^!crw$&1-`!I2DHql4kQJp`q;8 zuG4JKV44a*D43^^K_ZYc{}M(oaEQVTECbF0vakgl%)yUxBrrqaTELY18){_`VlKso ztW_EWPBk2amOdzHovD(**dZFDK|;eLBJfc*$jWV_981MkFt-v*u3NRwPSAu#biVv2 zTDrQWP|@Ilx#E(k^30OClAOC*s%gV&7tA1zQiiPiWr_s!i{|2Al%N`*&vGy(B?Hr1 z778a@4)8=SRF&8f3`&CVXhlmI%0bF!}hVU`p(G*v5S+2h%k+51+ zgZF5bZ*vx53TePc0VM%S1Rq^eb}m;Gd?Xn`{^QwxJPoJkz&RS&kEiO}i~N)0Hj3fZ z={dTbfITzmg-fBHbk;TH%J>7}P+&e&yATublD?(l)UdKCrYM#PlFTHIp$HbQwf&dd z$O#jeDJC&N;V$6%v*UvOf8AGq&uW+roS`?Yt27+$ z8W+5gH5`OX0%II#S;#RZM&YT}Qeq$gk3}xEcjV&LbVSl9+Ldyt+?mp^#;)IzrFW(B zrme=#i3JECkrj{iava5$FgPY-gL zgA_DB=c1|XqcJK0Bn)3ji#3D-YE1{NxppDp)(zF{;7=j>3*sQ6X()f6d#X4e35L)} zPRfxsX-4Y3*_+|sC7LcMJvZ#(bAJMk!qB*%!S5QGH!yjVYHTz&{jQ{JV@vvrZymb6 z1Dl*7x*nsFhlenXF&Xx*1;da{z|ayvS1}cHce{2`PyV()9`|pWpX0>4Kn%~8j_;sf zUF{zj=8|BjDf-HcwqEJ{fdXMhQ*&DcgY(ZY!Jk!>&ZF&X)!IZC8A%=(sWP6VtfvEM zwm!Unee=ljS=~gRM|27!$CFa-=aF2ZWIMc;sUNA`zCviikG_bLNEK%&K-lhpQOKUZ{$ncSU(^Mec#+=bK`FsiW zrZ9KjVImN_ka!cYUaBM z+F+AbqqFU4gSz^5{Do5bIuyV*_nY(1*n4h(fGT5s0ma~el(P|EK!!%8Lgiilb~v5I zbuli*?0Xd)H`y3j@L!RO8mNDM_b!9R?2`WD=)HubyvvwN$?{fXakH_w-PmKpQ5C6f zI%coS8;{wC;^t#6rX{2jhJtHW%I69A;wjO!HY+qSLJb@AV1hapSPQxWU>z`_I1$U2 zm}M@7P4ioc+seFF<}`u0_?f{N$;DXb7LywP;Q~dm&le-9?S~T}oA7w5%umB0g(6;v zA|#NPD2X&|3kN?5e+1Gblvi@F8UL>pykzpH8sfM*3)t|J)lt9gm-u z;y(`$o;-Vq|GbaqTF-YA?m09In#pJ+N8dA?K-qj0GB^|7_}R#m+|?BY77-ejed^|; z6O}KEY$z_jCMrLJW|=5p=u|Fn>|5)p98SQ;Y~OU~#*&(n>b)}l+!w`gx|%QUE64NU zzSd^jP`&t+QAauWBznUMxGqEBzN(|91AJZ}cD}>~NVPb~UeQv7thPkD`BCfI`&>GdZANrl_nt+DC~=FLn%bkxG=RUCElWo zOeoYAa6xS=r!lY69puxhQpYU06*);8a1E~>i=?3 z8fHtlPRa7#E-CdY((LP*)ur9I%*4-%(xyu^=stTBy$ow7c>Tca3vN~}NYd5=t<}_grXN8O=2@0iFYX!a^5`B@$!DTdA znZI5epH4G6y*Ugr2A5rhu|RP<9+h^xI2SZ>Z}XSFLbA?txCX=6Q$^}v6Qx3(^8>m$ z=U|cY*P&3ZHUkOw`$;y2;M`QyB_FHQEugl zu4E5L_BV^`gh-3fD0lzcYTbffQn=^?ATUP5T`t$B}=qa>yOYI`P>LPMa2O1&^ zsu6CJ)V-u>gZ<*5x+=7)riv5J215gVXRunCy%MPTq{9WuRz))=xo8G44f`R|C^W#@ zHWnfmy(eS+PRhMa%2(@k57wQWUu{0t(fQ68U2R9K46cqSy;FGCVrSz5?&9eB%ra`H zUU@7rnUA#BFAL(+A$?`;4b6vo2JUn^4HGB?VhX1Jef;?G$FcX*r~lYl?)+!=`_lOx zI=>gr@7Vdx-S>RwKfjE3wu=-lal%jqUN$oAib#Bf-d{)F|9t*revG}JK7aag`^*3O zzTW|d3j66L3Do|E3KPO)4hkc*=Irk?uWdNd%9ABO#Go6{2X{$mw`4@4FC5%1t|B!ffJ~r%MyaiAg|Xb<#}33>ngaSk3{KURZQ3?|Lrn`Z;v7(%THpWa+Juvms>W z_9&kKt1kZFL-{gs4waYNW{8)45j&;#e~@EIx0RR#R4jC!YqwuIEs{|DhukvS$(aiJL0?z z@k}1+xDx4prI#S0WbWi3h?YP$lS>&us5u&E&y7MocU2UQn$N8SY@Ixr23!MM8;rR& z#tKgz1*34)=fw-?!YNG!Am}oawu-YD0x&e^Dl61bG}3Yp5=+)%vyvlWByIR5m?A{f zzWRMru@uvKr(~Q%Oysf#i+bFFiCmhck^%J@9!<{srb?U&^jY8-WrSHx8lB--JA8<7 zw56i3giBj?bHVo`a9|+t#a$9gckpjODTUZ-r1>AjD4-0)a!LgY9EF-Z{dDxE$;co>6{s%IwslZoUp#l@&Yfjh4xBcVoY$X zf-=UnOVN!77}bxD?xxT2$)rZ3Z$mXd!5AA~w9LYbqjrReMJnzviP_C>; z=V&?6-hoGeFNuIxEAF}F1ix+DRymX(%Mn~H4y{|eh zhYqTH@bccK3zL*^r4D+ncWR=3TH(mY;?8jG=ZN0)>TWIPTRFR9=fpf)AYd5Qlm z?1sT>OQqT!&{Mblu3Cfw2g=i-cLp{0yBz=_d(atfOq7tYk&*4{vSS<$Hm-l6JWh=o1=dM|HFC&D)5(E>SjHF`i1!6jP_9=o6(`%E0%I zrRrqz{3NmaY$hu2%*5h30bv>)CnNEl1mdku{9o=8%pO7`OB3G7%P-%a7FpM2l?}78 zld8NoHElSLgj3fA$0t&Q>8zZcWNh>7KFgpw}Y23CXfCl_T91`wsdth<#d4h^V%@gSy&~C!q@%0@pssXR;f%jJeQYF#sW2sQGTha;D;2S}OA5E21*sgW%@hfM zoYMfSwSYP@Y_l|LobN2+DMxt~xZQzQwQIr8vHPBOHP>4s-Tm3C`u(hO$Ms&N;uRgM z>;NUQf(e2tq}JKgG;bTNmL09)Eg`F5)aNpi`|y<4?p^8a$*&elU7*dK$(;JF-Yz&x zdC}bTxAzBnkHiUe@P8x86Y9`!DcPAsL6z!Cq@W7*Os1g9wk1>m%}EsiSS7InXpKiz zX=B&mP*u;ayN&y3b;kTg6k$^!R9%Ebp+vLnwG_(2BW^K$zile1OMTX|sq}R~f|?tF zvKx%LPd0?frmNw5 zBa+=vuP--==%u;R&d!!1fxe@_H$p?S63^KSq=Buel88{Lo7BLT2lcHfb_<5IKYD0X zbMP-(@(oxvi!@laWwewXIw-$Jrgy_ST!troRjtW8XVTCTrFnyQ)C6${gLS z_Oh3PZ&G_L=qgFMbtoDMb%w?I*yX-ZZc1Eqhk&jU)G6t)Ry?F4jdB3@Bt$JTn~3RD zSL?5Z!^zsCT8m7d1kIrT`nl46uWS`rtrfVSSg-2W2G;@8rr=|c9&`|aW|R3MN5XA$~ymb(=n3oTNL+9nvj(*r*$bDS5yby zC0)*5pWeGfZdbBxNMu?icZgJm`li>D<-Ka2v}O79b`9*MTR%-0g5e8FxL^?X$%K@_#Z^^FX(v+$Ds_V*E4Uc%+@(@O9b2X8~>NQs+7uz0P~8E@>2pxb6I{oi+w5Zs%Vw831b zsK?=+yljn0st(X#GH=hXx1L|Bm#3xELDGX9O4v8|)!&=bhX_p&G;ma96HDjj-dw*g zF=wf2{vT;L@4*mKXNOrOFjckg)Ik};s}BTTKpeFSwe_|PVoZ)U(R2?!Yl@ueh?bfn zfUZ!Gn!7{#U21msC)eDIcsqF}FIyu-xS+CA5JCZOvny#G+0y#Mf32S4sM8U?mIqs6 z2j@s-V(D&rC0DfC+Id66HrwXg+saX6vpykuO*F}3w~*;mCquX>U#flkefyp_m8{?E z9q$1BE6>Pg(m}xpu_B}~OaNto(C!_zm8R1?qSKai=VefTQN>F_r*9^Vm0|`=f~ysF zh1}t_)peS>?`UJzUTm@=-r0pHJ@Oyi3FhBznvs;?KhxsD1WU_rF-u1s%QRm01De^T z?}b00_aLv8^t`|NiDB{{MHF`1dv7Sgf-(+tlBqW^0l`9taYz&JF&k5}@2i3N;%fm29*r7c|aH5DrqVub@8a zlOv_g&{Zc9f4h&%Uss7(Z-8*6gOa*syI63p_N*)y?kNMyihlMx&Q-pqec6Mp)quUa zhnq`#9D*rMH!gl=Fh-Y@op(>U+Rc4+xbbA;i6S?n<1KLOg!*kzlW!pYuDrT$B6|g$ zm7UB3{qN1-hll6?(#Op+Hp*1&=&EgPbkRZiXT*{0 zBe0hq3~j9aFay~hRZ-D}sy%d-$hE=ScQ0RkK6&^4?D_WN^K^;=5rH`p!1J-I^8nMF70Zz61OyAK=qh?|(afQ8UJ>#dSdG2?OxF%t<`Fz8+p*n;HEWlMr12 z%MtDoA3L4Fct=fGU(H%QH-|She3D}~k<+_0{E7gLv-*WT(RF;%V_KiQd-?h8(VJH# z8YVPccbY_C2$RH@tucZGQn$LxQqNo4(A9UVRWad0(ok0S-=hdYj(in>dUG>uoRu*W zP;Fz%wjKHof5RLLOXj`(Xa3k9`+F@!wxMH7nle{&NbA#*Y%NO3_PrTpmym^E+YSV1 zhGg=$u`ANQwnF{Ro(B8x^8!T)V%!%=bn8Ak?7t^Z5B5v;-_!jk;|KfiKA!9A(U0H) z#}jpf&J0IL3d85IBulVB6Y%3mNz~DggI8CH3i&9zpVd`o15?fe${kTJ%e=gZTv!AD zg{rH>mYv=11!v$35G*ADczU3I;`lV3&F~c%dc%?>r$KySwtKT48m~GV{0r%}Re2$M zkceqat_q$p?ZClcTiN0CYc#U-3vS}SQ38ED(7n`>fAVaGTx-x_BL*Z92;(WQ1Zzqb_v zljr?MHHR@Hg>Iy*$TXaVmn#C7FigY@4F3=J{tq9P+R~l&=dt>q1S-pNFeYCK|_-XfSyPn7WXL zbAh7R=Zld7ebTJ?4QjsbyrGai|6t+|ZD~(uzBf1@C5c7*=4Mbemdr3HSbXpW{FzdL zN;p?*X%Iqhy3`dd@7RnAbu_c#!u_aLM$W5+Lvm9K8FMqSrmi&;ix{}mtB#AyBEJ6a zwNB*RI|tu3iKk*Q-vsQG6vgAt706CggT%$vkqox!%*)%Wfri=tQWL(0n4RHr&Y;*zp+Oy>Ui-!wb#)lXOh#Ys5r4?z-6YkOBtmlrUBQygJ|5%-Tyj!h}< z{|Y%6>STzci@niFO`L970%xUL<#bkRiUh2nS;6>7<&dCSu}GC$xIZpiTJ$hZ%o3!A zK`XbIW`(*-C=|UqMW$Cpo`Y2R_x;#^4fcPwOYARUeA^X3hyA~|xA&}U{~tcs|M&Bh zm|c&%>&VT8!vtLH4bCwMCwU~$z8f*l{mz!IcTXc&9?DyWj+=4JxEco~^Y zy*45}cwfHXr$PQ(!(;=}zf=D2??0)?|NX-!5Ay#$o_;%d+fsJFHS7Fd(p}Z!7lDeP zbA(RWSMREqM)_K2CaYewHZo>+y)Mc@zayQK0r$1F4o^@pF%8uWX_l%=_h4RUH!x3J z7kfjU0*DRW=$+XC0Kz4++1za^3xKEYw=*?LwVaJ&{dkpyyjv@SP(FD|4~<~{w=Gja zD*Hp_9_YfM0G!4`_oTL=Du2|Fb_n*gJU8|M&42DGo_kR?#Pjy*FcTt$~v4t0`eMeo{pt_xPgbvv)=+H`%ax^2h!`di+N zb$b>##Z)qT6m}6_TSRhaU7H1aq`s4i?Wo4VwI3ThcdPb*z#56ou#{{*`PABq-PJ0{ zI|dafT1%zIjp+r7=9fq!S#`VC z5^}Au?ItS>G+Sbz%^CxJ=Tix&2J2shd95YYN@8h$<|> zi;#ME!6Eg`-wR?;U03vICaFMMHiFH5$jY5mP4Zf?Q?jbR&VIwg^KCs1_J0yiH{<*J zq1*l+Kie}h4Mr)^O3di%F>l{CzMV}6)`k6Vg@&iTJLF3taEPxl`7fA8h#8;w)DvOO)7 zUS&qt1qa>Pu5bydukNUasi5jec9DK(^c<0b8@{dJh8(2r^Z#D_fBVm#jjR5@{fGH~ zAJ0nuzuS1}s*blxKPLWhB^O#BgTbqC^ZmHiN~lQ5QjnYNzOpXNw(rn~wgE3%Vc1u5 za5Yz5)4^42Ft7)~@;0(pW}m97e4|^hShMNEEQjn3C2Aa10S=k+Rjt~Gb)cvQ+EJ*( ziJ)?q>bRVHo-V6`%uQuy49;da(3fr3(X_T;3oPmwtm`JK_^fOx-CI+*L0(osHb>$_ z)o_|A8uzPN^9?fsWVh3EBueqw2@Snl&3K_4jl(L1&6&nwL;S3j|LyvJGXX#y`v2hB z(}QyS&y$0P^Z)nqwAuAvcZuF?$UA#7-PU?dfJbecUwu_z6_>~YI2unWHxs6Cf1(+P z!aH*Yraqjfv(2i4X;WY|%FE9qWr`}PISR)S{a!BN+XZ}sa8DQ&G@#K0oV_?H*juH| z4?F{prAho4{aHT@d5(z6!ILag!;WK@nOC)-XE#7LslUddQOTxG)Rpt=%!Q(^ThD8y zTJ}7z)j@Cz(|v(p80nqqZ&ae(X0AJS=UYn9y>olMt@n7YPFacCCEX6#+X0U=uSAw+ z?gSymck(IS2(NQ1Z#~$iK!OnSM9__aDW8?yRNrY>?fb*a`o3-1?`^gg^?!r?=UA+_ zHUZo1zy1CF!;1fR?;-!w{X8~K`-L-0YGSEMlgJhJFiH3*m-zX8EZ^AKAm>(@Z^zg( zQkEAaSHYX6BazVsmel?gb3xhiHIA_;RjC%J6C0b}tup-ac3qurCC;SQ7uMe?)dp%| zDPCcV>~5QKshZKRv{dj>RCvhIu!NS+cxCyw>*7m-1q#l28k=*N1=2oNXJx^*e`aXs z{{wQQE1jc^IEpajuGi3c=}L;*_T}DySzI5T z)_UG<+aEVKz_DVGRN=L|CT?t1o5_Wlkzc@=^Q(Q4O9k_0($shj(A8Gf6iFHA&N{XX zy1s(aty3!D2IJWz%`a1i$=qD8hspeSPH6V}mAVc}8V62IX`?&W3>Id2^L3V>63$q- zm9HAns1)3*tAruWi;~y{C{`_5k8QiTSoq7)?pU5Wa(a%MX%7H{sPfPq?h z9}_IHbn31}P9`HP;-189$fTyny@lqz288u+#-=A0VivBeI>0soIbdkOwwr8_whO`E@JlDl@`BOJm z(`5yc?9!8FZfk%itJ%Sgt;}Dz#im%dv_q~jEtD_ophhG+!6*ssV(mtWUMAG4)6%^y zrHrWY%qlobHiKF|o~VO6-Ls=jQkO4_sbiQ9CaAX2OoK*qzVxb5U92-EncJ^P&6c68 z(J~!cCD$Tf$|)lYdHh;6Or^boLTOVBmRu+AUVc74De;OD1V1xcV*#twpYPGE&Lph~ z^s1?>dC;W6PH0%_y~KX?daib~vZ5<8Q|(OG_xct}nU{B|V5LwtH_@uoch5VgX`QyJ z=w4sx-dgqK6sT&=Q8{zd3TkvSn`s45we#JF;RRv>bL5#!`p-XZKh}J_^J!udEWD`O z?y9yGsr!zzhQ>-Uk9#!Ij(i8=HI-IBWL<8-hGOPnm@Oc*kR(BfJ#RWx>6brPC-&_~ z)=bF;25BRJK(T8Pb=0gyUA1v*@Y^l5yRpn_?Vs8xLEx^`v|%Le(XvaVX79N$K%1WD zh0W#CMtB3*-K^J!M_rfs{`W?@%(u>4MlQk^h#<~S7@eX{)1*+kMe#5ML)^dFV)52? z`>p``Sp3a4ZLKw0e-?R@25{?)F3e!Pfl_Y@yEbXnGf@Ic7RvstmgMO)FK7O!mXhQ5 zjzq>Zw14%KSr%Mlfwa)C@8V&d#ll7w2UWjUozH8{?F4hHWQTLB&8Jmi(7= z;R3BRS%G=m8CRdjFGAX#V5~Yk`hs^Dl}p(7bu0D0%fV(wY#jf(qJ3DR)KwyvK+>uV zP>}^R+q{S%j_sax7UEXk@&~n3(cL=+00sf#{0$9JC3jlcW9)$B)N&`**hQ~axZB%w zOXIEqEbfLl-EsUEs_Y=v7Ss)?t%6F0$LLR$pTDbO!JcqGXvQ24oo35H0Tx~x;BtWon5H;Vcd8^X zIEQn@{lOMETVM`&nka7@;0qK*U>?z_j@-dyz6%(NpuiVM<-v2_LlO?Q06}wo!|-FB z4-JJ{T>n2iK2R%mN>oh=5=amO5hlnV_%BaCp9;#*U<;UgseXHL3PQ~Iz@K9=QvYi3 z2mbUgHd6oE7mN8w{$u~-7i5$-G=;%=ny9<)_~1vMUnYYe{V6;j{OF5V{-zAi2S5IQ zgDvnIWSFKL9KU?U2Y$lnA1Dw5ABPBzba6)i82A@FpdlI!1}VWpn^NDZ#4ODJ_pgp# zzIo-x;b!~jnEy|nJ}J-tgS}@D_TRlcTj0obX$_9Jnspx&Mq?xvDCIt;Af!PWBcctZ zPmdQuBz!U&6$*_)8gT6(!(={E>j+WYRoj^i{tnI-sL&@^KoUlX>)TQ|;L_ZhEiV(}g{f&k3ZXfpX=23@U^Erx zdh>YjcYT#30s&Abf!sz(ka2g?X4rGX-evG)tD%~%uq}()I^FDBRgCZ&^Vz45y1iEoCX*Qt9kPaDHUueQWfM| zb8D$zKs|$_6Zx!UAM}$sgUZ@yLSGd?<$?bA;@6}1XP?X8{JU7;TfysQEWbw$K# zyK;)#IOy7Uf*~}DAts|B4oA(M?D&IMSAs!H5>Fara=EGK!j)@Q->rLrAt(ntIT|A0 z_LZ`bP@$L-3OtSUWvTiCT!!Fz&X@#Uf@avHu{lG`3hs}_d3!6mOd~q⋘%XpVJ+H z37{<0V8rlTU!Mt+gwcdyb-i%-N6LjHmxUBv3DiVUU-8xNb=N+sOA&tum;*vVM2Xa% z#u!JTukK_2SXRAo`fo5`fVPrZc^M+Z8iC|O%^I6T%tsrmD6APr3 zkvCQC*{P4oe8-oQlOv>LU?#{Vhv~f)u30NY0^$gTY6v9Jcfp0a0Y4Llk2v_KPYx)r z=cmX1$oX?ese6_ZS%D)^P#Jby`J^hw_<~-V6A3+))E(P@1^X<(qXL(E4$8Oaqig*? zuGs(2_MScD|Gk%|B(Ns~0F2jn0t8FZx4Le9cpjdI=izyHZuk8E0RRC1|AZk@k literal 0 HcmV?d00001 diff --git a/sregym/observer/prometheus/prometheus/values.schema.json b/sregym/observer/prometheus/prometheus/values.schema.json new file mode 100644 index 0000000..98c7de8 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/values.schema.json @@ -0,0 +1,738 @@ +{ + "$schema": "http://json-schema.org/schema#", + "type": "object", + "properties": { + "alertRelabelConfigs": { + "type": "object" + }, + "alertmanager": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "persistence": { + "type": "object", + "properties": { + "size": { + "type": "string" + } + } + }, + "podSecurityContext": { + "type": "object", + "properties": { + "fsGroup": { + "type": "integer" + }, + "runAsGroup": { + "type": "integer" + }, + "runAsNonRoot": { + "type": "boolean" + }, + "runAsUser": { + "type": "integer" + } + } + } + } + }, + "configmapReload": { + "type": "object", + "properties": { + "env": { + "type": "array" + }, + "prometheus": { + "type": "object", + "properties": { + "containerSecurityContext": { + "type": "object" + }, + "enabled": { + "type": "boolean" + }, + "extraArgs": { + "type": "object" + }, + "extraConfigmapMounts": { + "type": "array" + }, + "extraVolumeDirs": { + "type": "array" + }, + "extraVolumeMounts": { + "type": "array" + }, + "image": { + "type": "object", + "properties": { + "digest": { + "type": "string" + }, + "pullPolicy": { + "type": "string" + }, + "repository": { + "type": "string" + }, + "tag": { + "type": "string" + } + } + }, + "name": { + "type": "string" + }, + "resources": { + "type": "object" + } + } + }, + "reloadUrl": { + "type": "string" + } + } + }, + "extraManifests": { + "type": "array" + }, + "extraScrapeConfigs": { + "type": "string" + }, + "forceNamespace": { + "type": "string" + }, + "imagePullSecrets": { + "type": "array" + }, + "kube-state-metrics": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + } + } + }, + "networkPolicy": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + } + } + }, + "podSecurityPolicy": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + } + } + }, + "prometheus-node-exporter": { + "type": "object", + "properties": { + "containerSecurityContext": { + "type": "object", + "properties": { + "allowPrivilegeEscalation": { + "type": "boolean" + } + } + }, + "enabled": { + "type": "boolean" + }, + "rbac": { + "type": "object", + "properties": { + "pspEnabled": { + "type": "boolean" + } + } + } + } + }, + "prometheus-pushgateway": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "serviceAnnotations": { + "type": "object", + "properties": { + "prometheus.io/probe": { + "type": "string" + } + } + } + } + }, + "rbac": { + "type": "object", + "properties": { + "create": { + "type": "boolean" + } + } + }, + "ruleFiles": { + "type": "object" + }, + "server": { + "type": "object", + "properties": { + "affinity": { + "type": "object" + }, + "alertmanagers": { + "type": "array" + }, + "baseURL": { + "type": "string" + }, + "clusterRoleNameOverride": { + "type": "string" + }, + "command": { + "type": "array" + }, + "configMapOverrideName": { + "type": "string" + }, + "configPath": { + "type": "string" + }, + "containerSecurityContext": { + "type": "object" + }, + "defaultFlagsOverride": { + "type": "array" + }, + "deploymentAnnotations": { + "type": "object" + }, + "dnsConfig": { + "type": "object" + }, + "dnsPolicy": { + "type": "string" + }, + "emptyDir": { + "type": "object", + "properties": { + "sizeLimit": { + "type": "string" + } + } + }, + "enableServiceLinks": { + "type": "boolean" + }, + "env": { + "type": "array" + }, + "exemplars": { + "type": "object" + }, + "extraArgs": { + "type": "object" + }, + "extraConfigmapLabels": { + "type": "object" + }, + "extraConfigmapMounts": { + "type": "array" + }, + "extraFlags": { + "type": "array", + "items": { + "type": "string" + } + }, + "extraHostPathMounts": { + "type": "array" + }, + "extraInitContainers": { + "type": "array" + }, + "extraSecretMounts": { + "type": "array" + }, + "extraVolumeMounts": { + "type": "array" + }, + "extraVolumes": { + "type": "array" + }, + "global": { + "type": "object", + "properties": { + "evaluation_interval": { + "type": "string" + }, + "scrape_interval": { + "type": "string" + }, + "scrape_timeout": { + "type": "string" + } + } + }, + "hostAliases": { + "type": "array" + }, + "hostNetwork": { + "type": "boolean" + }, + "image": { + "type": "object", + "properties": { + "digest": { + "type": "string" + }, + "pullPolicy": { + "type": "string" + }, + "repository": { + "type": "string" + }, + "tag": { + "type": "string" + } + } + }, + "ingress": { + "type": "object", + "properties": { + "annotations": { + "type": "object" + }, + "enabled": { + "type": "boolean" + }, + "extraLabels": { + "type": "object" + }, + "extraPaths": { + "type": "array" + }, + "hosts": { + "type": "array" + }, + "path": { + "type": "string" + }, + "pathType": { + "type": "string" + }, + "tls": { + "type": "array" + } + } + }, + "livenessProbeFailureThreshold": { + "type": "integer" + }, + "livenessProbeInitialDelay": { + "type": "integer" + }, + "livenessProbePeriodSeconds": { + "type": "integer" + }, + "livenessProbeSuccessThreshold": { + "type": "integer" + }, + "livenessProbeTimeout": { + "type": "integer" + }, + "name": { + "type": "string" + }, + "nodeSelector": { + "type": "object" + }, + "persistentVolume": { + "type": "object", + "properties": { + "accessModes": { + "type": "array", + "items": { + "type": "string" + } + }, + "annotations": { + "type": "object" + }, + "enabled": { + "type": "boolean" + }, + "existingClaim": { + "type": "string" + }, + "labels": { + "type": "object" + }, + "mountPath": { + "type": "string" + }, + "size": { + "type": "string" + }, + "statefulSetNameOverride": { + "type": "string" + }, + "subPath": { + "type": "string" + } + } + }, + "podAnnotations": { + "type": "object" + }, + "podDisruptionBudget": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "maxUnavailable": { + "type": [ + "string", + "integer" + ] + } + } + }, + "podLabels": { + "type": "object" + }, + "podSecurityPolicy": { + "type": "object", + "properties": { + "annotations": { + "type": "object" + } + } + }, + "portName": { + "type": "string" + }, + "prefixURL": { + "type": "string" + }, + "priorityClassName": { + "type": "string" + }, + "probeHeaders": { + "type": "array" + }, + "probeScheme": { + "type": "string" + }, + "readinessProbeFailureThreshold": { + "type": "integer" + }, + "readinessProbeInitialDelay": { + "type": "integer" + }, + "readinessProbePeriodSeconds": { + "type": "integer" + }, + "readinessProbeSuccessThreshold": { + "type": "integer" + }, + "readinessProbeTimeout": { + "type": "integer" + }, + "releaseNamespace": { + "type": "boolean" + }, + "remoteRead": { + "type": "array" + }, + "remoteWrite": { + "type": "array" + }, + "replicaCount": { + "type": "integer" + }, + "resources": { + "type": "object" + }, + "retention": { + "type": "string" + }, + "retentionSize": { + "type": "string" + }, + "revisionHistoryLimit": { + "type": "integer" + }, + "securityContext": { + "type": "object", + "properties": { + "fsGroup": { + "type": "integer" + }, + "runAsGroup": { + "type": "integer" + }, + "runAsNonRoot": { + "type": "boolean" + }, + "runAsUser": { + "type": "integer" + } + } + }, + "service": { + "type": "object", + "properties": { + "additionalPorts": { + "type": "array" + }, + "annotations": { + "type": "object" + }, + "clusterIP": { + "type": "string" + }, + "enabled": { + "type": "boolean" + }, + "externalIPs": { + "type": "array" + }, + "gRPC": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "servicePort": { + "type": "integer" + } + } + }, + "labels": { + "type": "object" + }, + "loadBalancerIP": { + "type": "string" + }, + "loadBalancerSourceRanges": { + "type": "array" + }, + "servicePort": { + "type": "integer" + }, + "sessionAffinity": { + "type": "string" + }, + "statefulsetReplica": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "replica": { + "type": "integer" + } + } + }, + "type": { + "type": "string" + } + } + }, + "sidecarContainers": { + "type": "object" + }, + "sidecarTemplateValues": { + "type": "object" + }, + "startupProbe": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "failureThreshold": { + "type": "integer" + }, + "periodSeconds": { + "type": "integer" + }, + "timeoutSeconds": { + "type": "integer" + } + } + }, + "statefulSet": { + "type": "object", + "properties": { + "annotations": { + "type": "object" + }, + "enabled": { + "type": "boolean" + }, + "headless": { + "type": "object", + "properties": { + "annotations": { + "type": "object" + }, + "gRPC": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "servicePort": { + "type": "integer" + } + } + }, + "labels": { + "type": "object" + }, + "servicePort": { + "type": "integer" + } + } + }, + "labels": { + "type": "object" + }, + "podManagementPolicy": { + "type": "string" + }, + "pvcDeleteOnStsDelete": { + "type": "boolean" + }, + "pvcDeleteOnStsScale": { + "type": "boolean" + } + } + }, + "storagePath": { + "type": "string" + }, + "strategy": { + "type": "object", + "properties": { + "type": { + "type": "string" + } + } + }, + "tcpSocketProbeEnabled": { + "type": "boolean" + }, + "terminationGracePeriodSeconds": { + "type": "integer" + }, + "tolerations": { + "type": "array" + }, + "topologySpreadConstraints": { + "type": "array" + }, + "tsdb": { + "type": "object" + }, + "verticalAutoscaler": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + } + } + } + } + }, + "scrapeConfigFiles": { + "type": "array" + }, + "serverFiles": { + "type": "object", + "properties": { + "alerting_rules.yml": { + "type": "object" + }, + "alerts": { + "type": "object" + }, + "prometheus.yml": { + "type": "object", + "properties": { + "rule_files": { + "type": "array", + "items": { + "type": "string" + } + }, + "scrape_configs": { + "type": "array", + "items": { + "type": "object", + "properties": { + "job_name": { + "type": "string" + }, + "static_configs": { + "type": "array", + "items": { + "type": "object", + "properties": { + "targets": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } + } + } + } + } + }, + "recording_rules.yml": { + "type": "object" + }, + "rules": { + "type": "object" + } + } + }, + "serviceAccounts": { + "type": "object", + "properties": { + "server": { + "type": "object", + "properties": { + "annotations": { + "type": "object" + }, + "create": { + "type": "boolean" + }, + "name": { + "type": "string" + }, + "automountServiceAccountToken": { + "type": "boolean" + } + } + } + } + } + } +} diff --git a/sregym/observer/prometheus/prometheus/values.yaml b/sregym/observer/prometheus/prometheus/values.yaml new file mode 100644 index 0000000..1bde292 --- /dev/null +++ b/sregym/observer/prometheus/prometheus/values.yaml @@ -0,0 +1,896 @@ +# yaml-language-server: $schema=values.schema.json +# Default values for prometheus. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +rbac: + create: true + +podSecurityPolicy: + enabled: false + +imagePullSecrets: [] +# - name: "image-pull-secret" + +## Define serviceAccount names for components. Defaults to component's fully qualified name. +## +serviceAccounts: + server: + create: true + name: "" + annotations: {} + # automountServiceAccountToken: + +## Additional labels to attach to all resources +commonMetaLabels: {} + +## Monitors ConfigMap changes and POSTs to a URL +## Ref: https://github.com/prometheus-operator/prometheus-operator/tree/main/cmd/prometheus-config-reloader +## +configmapReload: + ## URL for configmap-reload to use for reloads + ## + reloadUrl: "" + + ## env sets environment variables to pass to the container. Can be set as name/value pairs, + ## read from secrets or configmaps. + env: [] + # - name: SOMEVAR + # value: somevalue + # - name: PASSWORD + # valueFrom: + # secretKeyRef: + # name: mysecret + # key: password + # optional: false + + prometheus: + ## If false, the configmap-reload container will not be deployed + ## + enabled: true + + ## configmap-reload container name + ## + name: configmap-reload + + ## configmap-reload container image + ## + image: + repository: quay.io/prometheus-operator/prometheus-config-reloader + tag: v0.67.0 + # When digest is set to a non-empty value, images will be pulled by digest (regardless of tag value). + digest: "" + pullPolicy: IfNotPresent + + # containerPort: 9533 + + ## Additional configmap-reload container arguments + ## + extraArgs: {} + + ## Additional configmap-reload volume directories + ## + extraVolumeDirs: [] + + ## Additional configmap-reload volume mounts + ## + extraVolumeMounts: [] + + ## Additional configmap-reload mounts + ## + extraConfigmapMounts: [] + # - name: prometheus-alerts + # mountPath: /etc/alerts.d + # subPath: "" + # configMap: prometheus-alerts + # readOnly: true + + ## Security context to be added to configmap-reload container + containerSecurityContext: {} + + ## configmap-reload resource requests and limits + ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: {} + +server: + ## Prometheus server container name + ## + name: server + + ## Use a ClusterRole (and ClusterRoleBinding) + ## - If set to false - we define a RoleBinding in the defined namespaces ONLY + ## + ## NB: because we need a Role with nonResourceURL's ("/metrics") - you must get someone with Cluster-admin privileges to define this role for you, before running with this setting enabled. + ## This makes prometheus work - for users who do not have ClusterAdmin privs, but wants prometheus to operate on their own namespaces, instead of clusterwide. + ## + ## You MUST also set namespaces to the ones you have access to and want monitored by Prometheus. + ## + # useExistingClusterRoleName: nameofclusterrole + + ## If set it will override prometheus.server.fullname value for ClusterRole and ClusterRoleBinding + ## + clusterRoleNameOverride: "" + + # Enable only the release namespace for monitoring. By default all namespaces are monitored. + # If releaseNamespace and namespaces are both set a merged list will be monitored. + releaseNamespace: false + + ## namespaces to monitor (instead of monitoring all - clusterwide). Needed if you want to run without Cluster-admin privileges. + # namespaces: + # - yournamespace + + # sidecarContainers - add more containers to prometheus server + # Key/Value where Key is the sidecar `- name: ` + # Example: + # sidecarContainers: + # webserver: + # image: nginx + # OR for adding OAuth authentication to Prometheus + # sidecarContainers: + # oauth-proxy: + # image: quay.io/oauth2-proxy/oauth2-proxy:v7.1.2 + # args: + # - --upstream=http://127.0.0.1:9090 + # - --http-address=0.0.0.0:8081 + # - ... + # ports: + # - containerPort: 8081 + # name: oauth-proxy + # protocol: TCP + # resources: {} + sidecarContainers: {} + + # sidecarTemplateValues - context to be used in template for sidecarContainers + # Example: + # sidecarTemplateValues: *your-custom-globals + # sidecarContainers: + # webserver: |- + # {{ include "webserver-container-template" . }} + # Template for `webserver-container-template` might looks like this: + # image: "{{ .Values.server.sidecarTemplateValues.repository }}:{{ .Values.server.sidecarTemplateValues.tag }}" + # ... + # + sidecarTemplateValues: {} + + ## Prometheus server container image + ## + image: + repository: quay.io/prometheus/prometheus + # if not set appVersion field from Chart.yaml is used + tag: "" + # When digest is set to a non-empty value, images will be pulled by digest (regardless of tag value). + digest: "" + pullPolicy: IfNotPresent + + ## Prometheus server command + ## + command: [] + + ## prometheus server priorityClassName + ## + priorityClassName: "" + + ## EnableServiceLinks indicates whether information about services should be injected + ## into pod's environment variables, matching the syntax of Docker links. + ## WARNING: the field is unsupported and will be skipped in K8s prior to v1.13.0. + ## + enableServiceLinks: true + + ## The URL prefix at which the container can be accessed. Useful in the case the '-web.external-url' includes a slug + ## so that the various internal URLs are still able to access as they are in the default case. + ## (Optional) + prefixURL: "" + + ## External URL which can access prometheus + ## Maybe same with Ingress host name + baseURL: "" + + ## Additional server container environment variables + ## + ## You specify this manually like you would a raw deployment manifest. + ## This means you can bind in environment variables from secrets. + ## + ## e.g. static environment variable: + ## - name: DEMO_GREETING + ## value: "Hello from the environment" + ## + ## e.g. secret environment variable: + ## - name: USERNAME + ## valueFrom: + ## secretKeyRef: + ## name: mysecret + ## key: username + env: [] + + # List of flags to override default parameters, e.g: + # - --enable-feature=agent + # - --storage.agent.retention.max-time=30m + # - --config.file=/etc/config/prometheus.yml + defaultFlagsOverride: [] + + extraFlags: + - web.enable-lifecycle + ## web.enable-admin-api flag controls access to the administrative HTTP API which includes functionality such as + ## deleting time series. This is disabled by default. + # - web.enable-admin-api + ## + ## storage.tsdb.no-lockfile flag controls BD locking + # - storage.tsdb.no-lockfile + ## + ## storage.tsdb.wal-compression flag enables compression of the write-ahead log (WAL) + # - storage.tsdb.wal-compression + + ## Path to a configuration file on prometheus server container FS + configPath: /etc/config/prometheus.yml + + ### The data directory used by prometheus to set --storage.tsdb.path + ### When empty server.persistentVolume.mountPath is used instead + storagePath: "" + + global: + ## How frequently to scrape targets by default + ## + scrape_interval: 1m + ## How long until a scrape request times out + ## + scrape_timeout: 10s + ## How frequently to evaluate rules + ## + evaluation_interval: 1m + ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_write + ## + remoteWrite: [] + ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#remote_read + ## + remoteRead: [] + + ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#tsdb + ## + tsdb: {} + # out_of_order_time_window: 0s + + ## https://prometheus.io/docs/prometheus/latest/configuration/configuration/#exemplars + ## Must be enabled via --enable-feature=exemplar-storage + ## + exemplars: {} + # max_exemplars: 100000 + + ## Custom HTTP headers for Liveness/Readiness/Startup Probe + ## + ## Useful for providing HTTP Basic Auth to healthchecks + probeHeaders: [] + # - name: "Authorization" + # value: "Bearer ABCDEabcde12345" + + ## Additional Prometheus server container arguments + ## + extraArgs: {} + + ## Additional InitContainers to initialize the pod + ## + extraInitContainers: [] + + ## Additional Prometheus server Volume mounts + ## + extraVolumeMounts: [] + + ## Additional Prometheus server Volumes + ## + extraVolumes: [] + + ## Additional Prometheus server hostPath mounts + ## + extraHostPathMounts: [] + # - name: certs-dir + # mountPath: /etc/kubernetes/certs + # subPath: "" + # hostPath: /etc/kubernetes/certs + # readOnly: true + + extraConfigmapMounts: [] + # - name: certs-configmap + # mountPath: /prometheus + # subPath: "" + # configMap: certs-configmap + # readOnly: true + + ## Additional Prometheus server Secret mounts + # Defines additional mounts with secrets. Secrets must be manually created in the namespace. + extraSecretMounts: [] + # - name: secret-files + # mountPath: /etc/secrets + # subPath: "" + # secretName: prom-secret-files + # readOnly: true + + ## ConfigMap override where fullname is {{.Release.Name}}-{{.Values.server.configMapOverrideName}} + ## Defining configMapOverrideName will cause templates/server-configmap.yaml + ## to NOT generate a ConfigMap resource + ## + configMapOverrideName: "" + + ## Extra labels for Prometheus server ConfigMap (ConfigMap that holds serverFiles) + extraConfigmapLabels: {} + + ingress: + ## If true, Prometheus server Ingress will be created + ## + enabled: false + + # For Kubernetes >= 1.18 you should specify the ingress-controller via the field ingressClassName + # See https://kubernetes.io/blog/2020/04/02/improvements-to-the-ingress-api-in-kubernetes-1.18/#specifying-the-class-of-an-ingress + # ingressClassName: nginx + + ## Prometheus server Ingress annotations + ## + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: 'true' + + ## Prometheus server Ingress additional labels + ## + extraLabels: {} + + ## Redirect ingress to an additional defined port on the service + # servicePort: 8081 + + ## Prometheus server Ingress hostnames with optional path + ## Must be provided if Ingress is enabled + ## + hosts: [] + # - prometheus.domain.com + # - domain.com/prometheus + + path: / + + # pathType is only for k8s >= 1.18 + pathType: Prefix + + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # backend: + # serviceName: ssl-redirect + # servicePort: use-annotation + + ## Prometheus server Ingress TLS configuration + ## Secrets must be manually created in the namespace + ## + tls: [] + # - secretName: prometheus-server-tls + # hosts: + # - prometheus.domain.com + + ## Server Deployment Strategy type + strategy: + type: Recreate + + ## hostAliases allows adding entries to /etc/hosts inside the containers + hostAliases: [] + # - ip: "127.0.0.1" + # hostnames: + # - "example.com" + + ## Node tolerations for server scheduling to nodes with taints + ## Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/ + ## + tolerations: [] + # - key: "key" + # operator: "Equal|Exists" + # value: "value" + # effect: "NoSchedule|PreferNoSchedule|NoExecute(1.6 only)" + + ## Node labels for Prometheus server pod assignment + ## Ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/ + ## + nodeSelector: {} + + ## Pod affinity + ## + affinity: {} + + ## Pod topology spread constraints + ## ref. https://kubernetes.io/docs/concepts/scheduling-eviction/topology-spread-constraints/ + topologySpreadConstraints: [] + + ## PodDisruptionBudget settings + ## ref: https://kubernetes.io/docs/concepts/workloads/pods/disruptions/ + ## + podDisruptionBudget: + enabled: false + maxUnavailable: 1 + # minAvailable: 1 + ## unhealthyPodEvictionPolicy is available since 1.27.0 (beta) + ## https://kubernetes.io/docs/tasks/run-application/configure-pdb/#unhealthy-pod-eviction-policy + # unhealthyPodEvictionPolicy: IfHealthyBudget + + ## Use an alternate scheduler, e.g. "stork". + ## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/ + ## + # schedulerName: + + persistentVolume: + ## If true, Prometheus server will create/use a Persistent Volume Claim + ## If false, use emptyDir + ## + enabled: true + + ## If set it will override the name of the created persistent volume claim + ## generated by the stateful set. + ## + statefulSetNameOverride: "" + + ## Prometheus server data Persistent Volume access modes + ## Must match those of existing PV or dynamic provisioner + ## Ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ + ## + accessModes: + - ReadWriteOnce + + ## Prometheus server data Persistent Volume labels + ## + labels: {} + + ## Prometheus server data Persistent Volume annotations + ## + annotations: {} + + ## Prometheus server data Persistent Volume existing claim name + ## Requires server.persistentVolume.enabled: true + ## If defined, PVC must be created manually before volume will be bound + existingClaim: "" + + ## Prometheus server data Persistent Volume mount root path + ## + mountPath: /data + + ## Prometheus server data Persistent Volume size + ## + size: 8Gi + + ## Prometheus server data Persistent Volume Storage Class + ## If defined, storageClassName: + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. (gp2 on AWS, standard on + ## GKE, AWS & OpenStack) + ## yinfang + storageClass: "openebs-hostpath" + + ## Prometheus server data Persistent Volume Binding Mode + ## If defined, volumeBindingMode: + ## If undefined (the default) or set to null, no volumeBindingMode spec is + ## set, choosing the default mode. + ## + # volumeBindingMode: "" + + ## Subdirectory of Prometheus server data Persistent Volume to mount + ## Useful if the volume's root directory is not empty + ## + subPath: "" + + ## Persistent Volume Claim Selector + ## Useful if Persistent Volumes have been provisioned in advance + ## Ref: https://kubernetes.io/docs/concepts/storage/persistent-volumes/#selector + ## + # selector: + # matchLabels: + # release: "stable" + # matchExpressions: + # - { key: environment, operator: In, values: [ dev ] } + + ## Persistent Volume Name + ## Useful if Persistent Volumes have been provisioned in advance and you want to use a specific one + ## yinfang + #volumeName: "prometheus-pv" # Changed in order to use PVC + + emptyDir: + ## Prometheus server emptyDir volume size limit + ## + sizeLimit: "" + + ## Annotations to be added to Prometheus server pods + ## + podAnnotations: {} + # iam.amazonaws.com/role: prometheus + + ## Labels to be added to Prometheus server pods + ## + podLabels: {} + + ## Prometheus AlertManager configuration + ## + alertmanagers: [] + + ## Specify if a Pod Security Policy for node-exporter must be created + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/ + ## + podSecurityPolicy: + annotations: {} + ## Specify pod annotations + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp + ## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#sysctl + ## + # seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*' + # seccomp.security.alpha.kubernetes.io/defaultProfileName: 'docker/default' + # apparmor.security.beta.kubernetes.io/defaultProfileName: 'runtime/default' + + ## Use a StatefulSet if replicaCount needs to be greater than 1 (see below) + ## + replicaCount: 1 + + ## Number of old history to retain to allow rollback + ## Default Kubernetes value is set to 10 + ## + revisionHistoryLimit: 10 + + ## Annotations to be added to deployment + ## + deploymentAnnotations: {} + + statefulSet: + ## If true, use a statefulset instead of a deployment for pod management. + ## This allows to scale replicas to more than 1 pod + ## + enabled: false + + annotations: {} + labels: {} + podManagementPolicy: OrderedReady + + ## Alertmanager headless service to use for the statefulset + ## + headless: + annotations: {} + labels: {} + servicePort: 80 + ## Enable gRPC port on service to allow auto discovery with thanos-querier + gRPC: + enabled: false + servicePort: 10901 + # nodePort: 10901 + + ## Statefulset's persistent volume claim retention policy + ## pvcDeleteOnStsDelete and pvcDeleteOnStsScale determine whether + ## statefulset's PVCs are deleted (true) or retained (false) on scaling down + ## and deleting statefulset, respectively. Requires 1.27.0+. + ## Ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#persistentvolumeclaim-retention + ## + pvcDeleteOnStsDelete: false + pvcDeleteOnStsScale: false + + ## Prometheus server readiness and liveness probe initial delay and timeout + ## Ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ + ## + tcpSocketProbeEnabled: false + probeScheme: HTTP + readinessProbeInitialDelay: 30 + readinessProbePeriodSeconds: 5 + readinessProbeTimeout: 4 + readinessProbeFailureThreshold: 3 + readinessProbeSuccessThreshold: 1 + livenessProbeInitialDelay: 30 + livenessProbePeriodSeconds: 15 + livenessProbeTimeout: 10 + livenessProbeFailureThreshold: 3 + livenessProbeSuccessThreshold: 1 + startupProbe: + enabled: false + periodSeconds: 5 + failureThreshold: 30 + timeoutSeconds: 10 + + ## Prometheus server resource requests and limits + ## Ref: http://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: {} + # limits: + # cpu: 500m + # memory: 512Mi + # requests: + # cpu: 500m + # memory: 512Mi + + # Required for use in managed kubernetes clusters (such as AWS EKS) with custom CNI (such as calico), + # because control-plane managed by AWS cannot communicate with pods' IP CIDR and admission webhooks are not working + ## + hostNetwork: false + + # When hostNetwork is enabled, this will set to ClusterFirstWithHostNet automatically + dnsPolicy: ClusterFirst + + # Use hostPort + # hostPort: 9090 + + # Use portName + portName: "" + + ## Vertical Pod Autoscaler config + ## Ref: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler + verticalAutoscaler: + ## If true a VPA object will be created for the controller (either StatefulSet or Deployemnt, based on above configs) + enabled: false + # updateMode: "Auto" + # containerPolicies: + # - containerName: 'prometheus-server' + + # Custom DNS configuration to be added to prometheus server pods + dnsConfig: {} + # nameservers: + # - 1.2.3.4 + # searches: + # - ns1.svc.cluster-domain.example + # - my.dns.search.suffix + # options: + # - name: ndots + # value: "2" + # - name: edns0 + + ## Security context to be added to server pods + ## + securityContext: + runAsUser: 65534 + runAsNonRoot: true + runAsGroup: 65534 + fsGroup: 65534 + + ## Security context to be added to server container + ## + containerSecurityContext: {} + + service: + ## If false, no Service will be created for the Prometheus server + ## + enabled: true + + annotations: {} + labels: {} + clusterIP: "" + + ## List of IP addresses at which the Prometheus server service is available + ## Ref: https://kubernetes.io/docs/concepts/services-networking/service/#external-ips + ## + externalIPs: [] + + loadBalancerIP: "" + loadBalancerSourceRanges: [] + servicePort: 80 + sessionAffinity: None + # type: ClusterIP + # yinfang + type: NodePort + nodePort: 32000 + + + ## Enable gRPC port on service to allow auto discovery with thanos-querier + gRPC: + enabled: false + servicePort: 10901 + # nodePort: 10901 + + ## If using a statefulSet (statefulSet.enabled=true), configure the + ## service to connect to a specific replica to have a consistent view + ## of the data. + statefulsetReplica: + enabled: false + replica: 0 + + ## Additional port to define in the Service + additionalPorts: [] + # additionalPorts: + # - name: authenticated + # port: 8081 + # targetPort: 8081 + + ## Prometheus server pod termination grace period + ## + terminationGracePeriodSeconds: 300 + + ## Prometheus data retention period (default if not specified is 15 days) + ## + retention: "15d" + + ## Prometheus' data retention size. Supported units: B, KB, MB, GB, TB, PB, EB. + ## + retentionSize: "" + +## Prometheus server ConfigMap entries for rule files (allow prometheus labels interpolation) +ruleFiles: {} + +## Prometheus server ConfigMap entries for scrape_config_files +## (allows scrape configs defined in additional files) +## +scrapeConfigFiles: [] + +## Prometheus server ConfigMap entries +## +serverFiles: + ## Alerts configuration + ## Ref: https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/ + alerting_rules.yml: {} + # groups: + # - name: Instances + # rules: + # - alert: InstanceDown + # expr: up == 0 + # for: 5m + # labels: + # severity: page + # annotations: + # description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.' + # summary: 'Instance {{ $labels.instance }} down' + ## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use alerting_rules.yml + alerts: {} + + ## Records configuration + ## Ref: https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/ + recording_rules.yml: {} + ## DEPRECATED DEFAULT VALUE, unless explicitly naming your files, please use recording_rules.yml + rules: {} + + prometheus.yml: + rule_files: + - /etc/config/recording_rules.yml + - /etc/config/alerting_rules.yml + ## Below two files are DEPRECATED will be removed from this default values file + - /etc/config/rules + - /etc/config/alerts + + scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + + - job_name: 'kubernetes-cadvisor' + + # Default to scraping over https. If required, just disable this or change to + # `http`. + scheme: https + + # This TLS & bearer token file config is used to connect to the actual scrape + # endpoints for cluster components. This is separate to discovery auth + # configuration because discovery & scraping are two separate concerns in + # Prometheus. The discovery auth config is automatic if Prometheus runs inside + # the cluster. Otherwise, more config options have to be provided within the + # . + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + # If your node certificates are self-signed or use a different CA to the + # master CA, then disable certificate verification below. Note that + # certificate verification is an integral part of a secure infrastructure + # so this should only be disabled in a controlled environment. You can + # disable certificate verification by uncommenting the line below. + # + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + + kubernetes_sd_configs: + - role: node + + # This configuration will work only on kubelet 1.7.3+ + # As the scrape endpoints for cAdvisor have changed + # if you are using older version you need to change the replacement to + # replacement: /api/v1/nodes/$1:4194/proxy/metrics + # more info here https://github.com/coreos/prometheus-operator/issues/633 + relabel_configs: + - target_label: __address__ + replacement: kubernetes.default.svc:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + + - job_name: 'kubernetes-nodes' + kubernetes_sd_configs: + - role: endpoints + relabel_configs: + - source_labels: [__meta_kubernetes_endpoints_name] + regex: 'prometheus-prometheus-node-exporter' + action: keep + +# adds additional scrape configs to prometheus.yml +# must be a string so you have to add a | after extraScrapeConfigs: +# example adds prometheus-blackbox-exporter scrape config +extraScrapeConfigs: "" + # - job_name: 'prometheus-blackbox-exporter' + # metrics_path: /probe + # params: + # module: [http_2xx] + # static_configs: + # - targets: + # - https://example.com + # relabel_configs: + # - source_labels: [__address__] + # target_label: __param_target + # - source_labels: [__param_target] + # target_label: instance + # - target_label: __address__ + # replacement: prometheus-blackbox-exporter:9115 + +# Adds option to add alert_relabel_configs to avoid duplicate alerts in alertmanager +# useful in H/A prometheus with different external labels but the same alerts +alertRelabelConfigs: {} + # alert_relabel_configs: + # - source_labels: [dc] + # regex: (.+)\d+ + # target_label: dc + +networkPolicy: + ## Enable creation of NetworkPolicy resources. + ## + enabled: false + +# Force namespace of namespaced resources +forceNamespace: "" + +# Extra manifests to deploy as an array +extraManifests: [] + # - | + # apiVersion: v1 + # kind: ConfigMap + # metadata: + # labels: + # name: prometheus-extra + # data: + # extra-data: "value" + +# Configuration of subcharts defined in Chart.yaml + +## alertmanager sub-chart configurable values +## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/alertmanager +## +alertmanager: + ## If false, alertmanager will not be installed + ## + enabled: false + + persistence: + size: 2Gi + + podSecurityContext: + runAsUser: 65534 + runAsNonRoot: true + runAsGroup: 65534 + fsGroup: 65534 + +## kube-state-metrics sub-chart configurable values +## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics +## +kube-state-metrics: + ## If false, kube-state-metrics sub-chart will not be installed + ## + enabled: true + +## prometheus-node-exporter sub-chart configurable values +## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter +## +prometheus-node-exporter: + ## If false, node-exporter will not be installed + ## + enabled: true + + rbac: + pspEnabled: false + + containerSecurityContext: + allowPrivilegeEscalation: false + +## prometheus-pushgateway sub-chart configurable values +## Please see https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-pushgateway +## +prometheus-pushgateway: + ## If false, pushgateway will not be installed + ## + enabled: true + + # Optional service annotations + serviceAnnotations: + prometheus.io/probe: pushgateway + +prometheus-blackbox-exporter: + enabled: false \ No newline at end of file diff --git a/sregym/observer/tidb_cluster_deploy_helper.py b/sregym/observer/tidb_cluster_deploy_helper.py new file mode 100644 index 0000000..2797d28 --- /dev/null +++ b/sregym/observer/tidb_cluster_deploy_helper.py @@ -0,0 +1,21 @@ +import os +from pathlib import Path + +from sregym.service.apps.tidb_cluster_operator import TiDBClusterDeployer + + +class TiDBClusterDeployHelper: + _ready = False + + @classmethod + def running_cluster(self): + if not self._ready: + base_dir = Path(__file__).parent.parent + meta_path = base_dir / "service" / "metadata" / "tidb_metadata.json" + print("Starting TiDB Cluster...") + deployer = TiDBClusterDeployer(str(meta_path)) + deployer.deploy_all() + self._ready = True + print("TiDB Cluster is running.") + else: + print("TiDB Cluster is already running.") diff --git a/sregym/observer/tidb_prometheus.py b/sregym/observer/tidb_prometheus.py new file mode 100644 index 0000000..9e829e2 --- /dev/null +++ b/sregym/observer/tidb_prometheus.py @@ -0,0 +1,81 @@ +import json +import os +import shlex +import subprocess + +from sregym.service.telemetry.prometheus import Prometheus + +FLEETCAST_NS = "fleetcast" +FLEETCAST_DEP = "fleetcast-satellite-app-backend" +FLEETCAST_METRICS_PORT = "5000" + + +def run_cmd(cmd, capture=False): + if isinstance(cmd, (list, tuple)): + cmd = [str(c) for c in cmd] + printable = " ".join(shlex.quote(c) for c in cmd) + else: + printable = cmd + print("Running:", printable) + + if capture: + return subprocess.check_output(cmd, text=True, stderr=subprocess.STDOUT).strip() + else: + subprocess.run(cmd, check=True) + + +# def check_prometheus_targets(prom: Prometheus): +# # Prefer env override, fallback to 9090 (your manual port-forward), then prom.port +# port = os.environ.get("PROMETHEUS_PORT") or "9090" or str(prom.port) +# base_url = f"http://localhost:{port}" +# print(f"[debug] Checking Prometheus at {base_url}") + +# out = run_cmd(["curl", "-s", f"{base_url}/api/v1/targets"], capture=True) +# try: +# j = json.loads(out) +# targets = j.get("data", {}).get("activeTargets", []) +# for t in targets: +# print("Target:", t["labels"].get("job"), t["health"]) +# print(f"[done] listed {len(targets)} targets") +# except Exception: +# print(out) + + +def ensure_fleetcast_scrape_annotations(): + """Ensure FleetCast backend is annotated for Prometheus scraping.""" + expected = { + "prometheus.io/scrape": "true", + "prometheus.io/path": "/metrics", + "prometheus.io/port": FLEETCAST_METRICS_PORT, + } + + patch = {"spec": {"template": {"metadata": {"annotations": expected}}}} + run_cmd( + ["kubectl", "-n", FLEETCAST_NS, "patch", "deploy", FLEETCAST_DEP, "--type=merge", "-p", json.dumps(patch)], + capture=False, + ) + + print(f"[rollout] restarting {FLEETCAST_DEP}…") + run_cmd(["kubectl", "-n", FLEETCAST_NS, "rollout", "restart", f"deploy/{FLEETCAST_DEP}"], capture=False) + run_cmd(["kubectl", "-n", FLEETCAST_NS, "rollout", "status", f"deploy/{FLEETCAST_DEP}"], capture=False) + + print(f"[done] scrape annotations applied and rollout complete for {FLEETCAST_DEP}") + + +def main(): + prom = Prometheus() + + if not prom._is_prometheus_running(): + prom.deploy() + else: + print("Prometheus already running, skipping deploy.") + + # check_prometheus_targets(prom) + + ensure_fleetcast_scrape_annotations() + + # prom.stop_port_forward() + + +if __name__ == "__main__": + main() diff --git a/sregym/observer/trace_api.py b/sregym/observer/trace_api.py new file mode 100644 index 0000000..f78b28d --- /dev/null +++ b/sregym/observer/trace_api.py @@ -0,0 +1,365 @@ +import os +import select +import socket +import subprocess +import threading +import time +from datetime import datetime +from typing import List, Optional + +import pandas as pd +import requests +import logging +class TraceAPI: + """ + Jaeger HTTP API helper. + + - For most apps: + * Prefer NodePort on svc/jaeger + * Otherwise port-forward svc/jaeger :16686 + * Base URL: http://127.0.0.1: + + - For Astronomy Shop: + * Jaeger is exposed THROUGH the frontend proxy on 8080, under /jaeger + * Prefer NodePort on svc/frontend-proxy + * Otherwise port-forward svc/frontend-proxy :8080 + * Base URL: http://127.0.0.1:/jaeger + """ + + _instance_lock: threading.Lock + + def __init__(self, namespace: str, prefer_nodeport: bool = True, pf_ready_sleep: float = 2.0): + self.namespace = namespace + self.prefer_nodeport = prefer_nodeport + self.pf_ready_sleep = pf_ready_sleep + + self.port_forward_process: Optional[subprocess.Popen] = None + self.local_port: Optional[int] = None + self.stop_event = threading.Event() + self.output_threads: List[threading.Thread] = [] + self._instance_lock = threading.Lock() + + # Decide service/port/prefix based on namespace + self._is_astronomy = self.namespace == "astronomy-shop" + self._svc_name = "frontend-proxy" if self._is_astronomy else "jaeger" + self._remote_port = "8080" if self._is_astronomy else "16686" + self._url_prefix = "/jaeger/ui" if self._is_astronomy else "" + + self.local_logger = logging.getLogger("all.infra.trace_api") + self.local_logger.propagate = True + self.local_logger.setLevel(logging.DEBUG) + + # Choose access path: NodePort (if available) else port-forward + node_port = None + if self.prefer_nodeport: + node_port = self.get_nodeport(self._svc_name, self.namespace) + + if node_port: + # Use NodePort directly + self.base_url = f"http://localhost:{node_port}{self._url_prefix}" + self.using_port_forward = False + self._export_env(port=node_port) # <<< ensure env set for NodePort (incl. astronomy-shop) + else: + # Fall back to port-forward on a free local port + self.using_port_forward = True + self.start_port_forward() # sets base_url and env + + # ------------------------ + # Cluster discovery helpers + # ------------------------ + + def get_nodeport(self, service_name: str, namespace: str) -> Optional[str]: + """Return NodePort string if present; otherwise None.""" + try: + result = subprocess.check_output( + [ + "kubectl", + "get", + "service", + service_name, + "-n", + namespace, + "-o", + "jsonpath={.spec.ports[0].nodePort}", + ], + text=True, + ).strip() + if result: + self.local_logger.info(f"NodePort for service {service_name}: {result}") + return result + return None + except subprocess.CalledProcessError as e: + msg = (e.output or "").strip() + if msg: + self.local_logger.error(f"Error getting NodePort: {msg}") + return None + + def get_jaeger_pod_name(self) -> str: + """Resolve the Jaeger pod name (if you ever need pod forwarding).""" + try: + result = subprocess.check_output( + [ + "kubectl", + "get", + "pods", + "-n", + self.namespace, + "-l", + "app.kubernetes.io/name=jaeger", + "-o", + "jsonpath={.items[0].metadata.name}", + ], + text=True, + ) + name = result.strip() + if not name: + raise RuntimeError("No Jaeger pods found") + return name + except subprocess.CalledProcessError as e: + raise RuntimeError(f"Error getting Jaeger pod name: {e.output}") from e + + # ------------------------ + # Port-forward management + # ------------------------ + + @staticmethod + def _pick_free_port() -> int: + """Pick a free local TCP port.""" + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(("", 0)) + return s.getsockname()[1] + + def _build_pf_cmd(self, local_port: int) -> list: + """ + Build a kubectl port-forward command that binds only to 127.0.0.1. + """ + target = f"svc/{self._svc_name}" + return [ + "kubectl", + "-n", + self.namespace, + "port-forward", + target, + f"{local_port}:{self._remote_port}", + "--address", + "127.0.0.1", + ] + + def _print_output(self, stream): + """Non-blocking reader for subprocess stdout/stderr.""" + while not self.stop_event.is_set(): + if self.port_forward_process and self.port_forward_process.poll() is not None: + break + try: + ready, _, _ = select.select([stream], [], [], 0.1) + except (ValueError, OSError): + break + if ready: + line = stream.readline() + if line: + self.local_logger.info(line.rstrip()) + else: + break + + def start_port_forward(self): + """Start kubectl port-forward exactly once; idempotent.""" + with self._instance_lock: + if self.port_forward_process and self.port_forward_process.poll() is None: + return + + self.local_port = self._pick_free_port() + cmd = self._build_pf_cmd(self.local_port) + + msg = "Starting port-forward with command:" + " ".join(cmd) + self.local_logger.info(msg) + self.port_forward_process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + if self.port_forward_process.stdout: + t_out = threading.Thread( + target=self._print_output, args=(self.port_forward_process.stdout,), daemon=True + ) + t_out.start() + self.output_threads.append(t_out) + if self.port_forward_process.stderr: + t_err = threading.Thread( + target=self._print_output, args=(self.port_forward_process.stderr,), daemon=True + ) + t_err.start() + self.output_threads.append(t_err) + + # Let kubectl set up the tunnel + time.sleep(self.pf_ready_sleep) + + if self.port_forward_process and self.port_forward_process.poll() is None: + self.local_logger.info(f"Port forwarding established successfully on") + self.base_url = f"http://127.0.0.1:{self.local_port}{self._url_prefix}" + self._export_env(port=self.local_port) # <<< ensure env set for PF case (incl. astronomy-shop) + else: + raise RuntimeError("Port forwarding failed to start") + + def stop_port_forward(self): + """Terminate kubectl and close streams.""" + with self._instance_lock: + if not self.port_forward_process: + return + + self.stop_event.set() + try: + self.port_forward_process.terminate() + self.port_forward_process.wait(timeout=5) + except Exception as e: + self.local_logger.error(f"Error terminating port-forward process:", e) + try: + self.port_forward_process.kill() + except Exception: + pass + + try: + if self.port_forward_process.stdout: + self.port_forward_process.stdout.close() + if self.port_forward_process.stderr: + self.port_forward_process.stderr.close() + except Exception as e: + self.local_logger.error(f"Error closing process streams:", e) + + self.port_forward_process = None + self.local_port = None + + for t in self.output_threads: + t.join(timeout=2) + self.output_threads.clear() + self.local_logger.info("Port-forward stopped.") + + def cleanup(self): + """Public cleanup (safe to call multiple times).""" + if self.using_port_forward: + self.stop_port_forward() + self.local_logger.info("Cleanup completed.") + + # ------------------------ + # Environment export + # ------------------------ + + def _export_env(self, port: int): + """ + Standardize env for downstream tools: + - JAEGER_PORT: the local port to reach Jaeger (NodePort or PF) + - JAEGER_BASE_URL: full base URL (includes prefix for astronomy-shop) + """ + os.environ["JAEGER_PORT"] = str(port) + os.environ["JAEGER_BASE_URL"] = self.base_url + + # ------------------------ + # Jaeger API wrappers + # ------------------------ + + @staticmethod + def _api_headers(): + # Some proxies are picky; be explicit about JSON. + return {"Accept": "application/json"} + + def get_services(self) -> List[str]: + """Fetch list of service names known to Jaeger.""" + url = f"{self.base_url}/api/services" + try: + resp = requests.get(url, headers=self._api_headers(), timeout=10) + resp.raise_for_status() + data = resp.json() + return data.get("data", []) or [] + except Exception as e: + self.local_logger.error(f"Failed to get services: {e}") + return [] + + def get_traces( + self, service_name: str, start_time: datetime, end_time: datetime, limit: Optional[int] = None + ) -> list: + """ + Fetch traces for a service between start_time and end_time. + Jaeger HTTP API supports lookback + optional limit. + """ + lookback_sec = int((datetime.now() - start_time).total_seconds()) + url = f"{self.base_url}/api/traces?service={service_name}&lookback={lookback_sec}s" + if limit is not None: + url += f"&limit={limit}" + + try: + resp = requests.get(url, headers=self._api_headers(), timeout=15) + resp.raise_for_status() + return resp.json().get("data", []) or [] + except Exception as e: + self.local_logger.error(f"Failed to get traces for {service_name}: {e}") + return [] + + def extract_traces(self, start_time: datetime, end_time: datetime, limit: Optional[int] = None) -> list: + """ + Extract traces across all services (except utility ones) in the time range. + Automatically calls cleanup() when done. + """ + try: + services = self.get_services() + self.local_logger.info(f"services: {services}") + all_traces = [] + if not services: + self.local_logger.error("No services found.") + return all_traces + + for svc in services: + if svc == "jaeger-all-in-one": + continue + traces = self.get_traces(svc, start_time, end_time, limit=limit) + for trace in traces: + # Normalize serviceName into spans for easier downstream processing + proc_map = trace.get("processes", {}) + for span in trace.get("spans", []): + span["serviceName"] = proc_map.get(span.get("processID"), {}).get("serviceName", "unknown") + all_traces.append(trace) + return all_traces + finally: + self.cleanup() + + def process_traces(self, traces: list) -> pd.DataFrame: + """Flatten raw Jaeger traces into a DataFrame.""" + rows = [] + for trace in traces: + tid = trace.get("traceID") + for span in trace.get("spans", []): + parent_span = "ROOT" + for ref in span.get("references", []): + if ref.get("refType") == "CHILD_OF": + parent_span = ref.get("spanID") + break + + has_error = False + response = "Unknown" + for tag in span.get("tags", []): + if tag.get("key") == "error" and bool(tag.get("value")): + has_error = True + if tag.get("key") in ("http.status_code", "response_class"): + response = tag.get("value") + + rows.append( + { + "trace_id": tid, + "span_id": span.get("spanID"), + "parent_span": parent_span, + "service_name": span.get("serviceName"), + "operation_name": span.get("operationName"), + "start_time": span.get("startTime"), + "duration": span.get("duration"), + "has_error": has_error, + "response": response, + } + ) + + return pd.DataFrame(rows) + + def save_traces(self, df: pd.DataFrame, path: str) -> str: + os.makedirs(path, exist_ok=True) + file_path = os.path.join(path, f"traces_{int(time.time())}.csv") + df.to_csv(file_path, index=False) + return f"Traces data exported to: {file_path}" diff --git a/sregym/paths.py b/sregym/paths.py new file mode 100644 index 0000000..b6d7f65 --- /dev/null +++ b/sregym/paths.py @@ -0,0 +1,30 @@ +import os +from pathlib import Path + +HOME_DIR = Path(os.path.expanduser("~")) +BASE_DIR = Path(__file__).resolve().parent +BASE_PARENT_DIR = Path(__file__).resolve().parent.parent + +# Targe microservice and its utilities directories +TARGET_MICROSERVICES = BASE_PARENT_DIR / "SREGym-applications" + +# Cache directories +CACHE_DIR = HOME_DIR / "cache_dir" +LLM_CACHE_FILE = CACHE_DIR / "llm_cache.json" + +# Fault scripts +FAULT_SCRIPTS = BASE_DIR / "generators" / "fault" / "script" + +# Metadata files +SOCIAL_NETWORK_METADATA = BASE_DIR / "service" / "metadata" / "social-network.json" +HOTEL_RES_METADATA = BASE_DIR / "service" / "metadata" / "hotel-reservation.json" +PROMETHEUS_METADATA = BASE_DIR / "service" / "metadata" / "prometheus.json" +TRAIN_TICKET_METADATA = BASE_DIR / "service" / "metadata" / "train-ticket.json" +ASTRONOMY_SHOP_METADATA = BASE_DIR / "service" / "metadata" / "astronomy-shop.json" +TIDB_METADATA = BASE_DIR / "service" / "metadata" / "tidb-with-operator.json" +FLIGHT_TICKET_METADATA = BASE_DIR / "service" / "metadata" / "flight-ticket.json" +FLEET_CAST_METADATA = BASE_DIR / "service" / "metadata" / "fleet-cast.json" +BLUEPRINT_HOTEL_RES_METADATA = BASE_DIR / "service" / "metadata" / "blueprint-hotel-reservation.json" + +# Khaos DaemonSet +KHAOS_DS = BASE_DIR / "service" / "khaos.yaml" diff --git a/sregym/resources/trainticket/locust-deployment.yaml b/sregym/resources/trainticket/locust-deployment.yaml new file mode 100644 index 0000000..b1326ac --- /dev/null +++ b/sregym/resources/trainticket/locust-deployment.yaml @@ -0,0 +1,72 @@ +apiVersion: v1 +kind: Service +metadata: + name: load-generator + namespace: train-ticket + labels: + app: load-generator +spec: + selector: + app: load-generator + ports: + - name: web + port: 8089 + targetPort: 8089 + nodePort: 30089 + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: load-generator + namespace: train-ticket + labels: + app: load-generator +spec: + replicas: 1 + selector: + matchLabels: + app: load-generator + template: + metadata: + labels: + app: load-generator + spec: + containers: + - name: load-generator + image: locustio/locust:2.14.2 + ports: + - containerPort: 8089 + name: web + env: + - name: LOCUST_AUTOSTART + value: "true" + - name: LOCUST_USERS + value: "5" + - name: LOCUST_SPAWN_RATE + value: "1" + - name: LOCUST_HEADLESS + value: "false" + - name: LOCUST_WEB_HOST + value: "0.0.0.0" + - name: LOCUST_WEB_PORT + value: "8089" + - name: LOCUST_HOST + value: "http://ts-ui-dashboard:8080" + args: + - "--locustfile=/usr/src/app/locustfile.py" + volumeMounts: + - name: locustfile + mountPath: /usr/src/app/locustfile.py + subPath: locustfile.py + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "500m" + volumes: + - name: locustfile + configMap: + name: locustfile-config diff --git a/sregym/resources/trainticket/locustfile.py b/sregym/resources/trainticket/locustfile.py new file mode 100644 index 0000000..910a3b3 --- /dev/null +++ b/sregym/resources/trainticket/locustfile.py @@ -0,0 +1,250 @@ +import time +from locust import HttpUser, task, between +import requests + + +class TrainTicketUser(HttpUser): + wait_time = between(1, 2) + + def on_start(self): + self.client.verify = False + self.last_login_time = 0 + self.login_interval = 1800 # 30 minutes in seconds + self._login() + + def _login(self): + + current_time = time.time() + self.last_login_time = current_time + + response = self.client.post( + "/api/v1/users/login", + json={"username": "fdse_microservice", "password": "111111"}, + headers={"Content-Type": "application/json"}, + name="/users/login", + ) + if response.status_code == 200: + data = response.json() + self.token = data.get("data", {}).get("token", "") + self.user_id = data.get("data", {}).get("userId", "") + self.headers = {"Authorization": f"Bearer {self.token}", "Content-Type": "application/json"} + print(f"[Login] Successfully logged in at {current_time}, token: {self.token[:20]}...") + else: + print(f"[Login] Failed: {response.status_code}") + self.token = "" + self.user_id = "" + self.headers = {"Content-Type": "application/json"} + + # The JWT token is valid for 1 hours, so we need to refresh it if it's expired. + def _check_and_refresh_token(self): + current_time = time.time() + if current_time - self.last_login_time > self.login_interval: + print(f"[Token] Refreshing token after {current_time - self.last_login_time:.0f} seconds") + self._login() + + def _get_existing_order_id(self): + if not getattr(self, "user_id", None): + return None + + payload = {"loginId": self.user_id} + + # Primary: ts-order-service refresh (POST) + try: + resp = self.client.post( + "/api/v1/orderservice/order/refresh", + json=payload, + headers=self.headers, + name="/orders/refresh", + ) + if resp.status_code == 200: + data = resp.json() + orders = data.get("data", []) + if orders: + first = orders[0] if isinstance(orders, list) else orders + oid = first.get("id") or first.get("orderId") + if oid: + return oid + else: + print(f"Orderservice refresh failed: {resp.status_code} {resp.text[:200]}") + except Exception as e: + print(f"Error calling orderservice refresh: {e}") + return None + + @task(1) + def test_fault_17_voucher_slow(self): + """Test F-17: slow DB due to nested SELECTs via direct voucher service call. + Expected: + - F-17 ON: /getVoucher takes >5s and times out -> failure + - F-17 OFF: /getVoucher returns quickly (<5s) -> success + """ + if not getattr(self, "headers", None): + return + + order_id = self._get_existing_order_id() + if not order_id: + return + + payload = {"orderId": order_id, "type": 1} + start = time.time() + + try: + with self.client.post( + "http://ts-voucher-service:16101/getVoucher", + json=payload, + headers={"Content-Type": "application/json"}, + name="/getVoucher (F17)", + timeout=5, + catch_response=True, + ) as response: + elapsed = time.time() - start + print(f"[F17] /getVoucher status={response.status_code} elapsed={elapsed:.2f}s") + + if response.status_code == 200: + print(f"[F17] SUCCESS: Voucher retrieved in {elapsed:.2f}s | response: {response.text}") + response.success() + else: + print(f"[F17] FAILURE: Status {response.status_code} in {elapsed:.2f}s") + response.failure(f"[F17] Voucher service failed to retrieve voucher. Error: {response.text}. Elapsed: {elapsed:.2f}s") + + except requests.exceptions.ReadTimeout as e: + # F-17 ON: Voucher service sleeps for 10s, causing >5s timeout + elapsed = time.time() - start + print(f"[F17] /getVoucher timed out after {elapsed:.2f}s (F17 ON - expected behavior!): {e}") + + except Exception as e: + # Other errors + elapsed = time.time() - start + print(f"[F17] /getVoucher error after {elapsed:.2f}s: {e}") + + @task(1) + def test_fault_22_sql_column_missing(self): + """Test F-22: SQL column missing error in contacts service. + Expected: + - F-22 ON: Contact creation fails with SQL column missing error -> status 0 + - F-22 OFF: Contact creation succeeds -> status 1 + """ + if not getattr(self, "headers", None): + return + + self._check_and_refresh_token() + + import uuid + unique_name = f"TestContact_{uuid.uuid4().hex[:8]}" + + # Create contact payload + contact_payload = { + "name": unique_name, + "accountId": self.user_id, + "documentType": 1, + "documentNumber": unique_name, + "phoneNumber": f"555-{unique_name[-4:]}" + } + + print(f"[F22] Testing contact creation: {unique_name}") + + try: + # Create contact + with self.client.post( + "/api/v1/contactservice/contacts", + json=contact_payload, + headers=self.headers, + name="/contacts/create (F22)", + catch_response=True, + ) as response: + + if response.status_code == 201: + data = response.json() + status = data.get("status", -1) + msg = data.get("msg", "") + + if status == 1: + print(f"[F22] SUCCESS: Contact created successfully | status: {status} | msg: {msg}") + print(f"[F22] Contact data: {data.get('data', {})}") + + # Clean up: Delete the contact to avoid crowding the list + contact_id = data.get("data", {}).get("id") + if contact_id: + try: + delete_response = self.client.delete( + f"/api/v1/contactservice/contacts/{contact_id}", + headers=self.headers, + name="/contacts/delete", + ) + if delete_response.status_code == 200: + print(f"[F22] Cleanup: Contact {contact_id} deleted successfully") + else: + print(f"[F22] Cleanup: Failed to delete contact {contact_id}, status: {delete_response.status_code}") + except Exception as e: + print(f"[F22] Cleanup: Error deleting contact {contact_id}: {e}") + + response.success() + + elif status == 0: + print(f"[F22] FAILURE: Contact creation failed | status: {status} | msg: {msg}") + response.failure(f"[F22] Contact creation failed: {msg}") + else: + print(f"[F22] UNKNOWN: Unexpected status {status} | msg: {msg}") + response.failure(f"[F22] Contact creation returned unexpected status: {status}") + + else: + print(f"[F22] HTTP ERROR: Status {response.status_code} | Response: {response.text}") + response.failure(f"[F22] Contact creation HTTP error: {response.status_code}") + + except Exception as e: + print(f"[F22] EXCEPTION: Error during contact creation: {e}") + # Can't call response.failure() here since response is not in scope + print(f"[F22] Exception details: {e}") + + @task(2) + def get_routes(self): + """ + Get all available train routes. + """ + if not getattr(self, "headers", None): + return + + self._check_and_refresh_token() + + try: + response = self.client.get( + "/api/v1/routeservice/routes", + headers=self.headers, + name="/routes/get", + ) + + if response.status_code == 200: + data = response.json() + routes_count = len(data.get("data", [])) if isinstance(data.get("data"), list) else 0 + # print(f"[Routes] Successfully retrieved {routes_count} routes") + else: + print(f"[Routes] Failed to get routes: {response.status_code}") + + except Exception as e: + print(f"[Routes] Error getting routes: {e}") + + @task(2) + def get_stations(self): + """ + Get all available train stations. + """ + if not getattr(self, "headers", None): + return + + self._check_and_refresh_token() + + try: + response = self.client.get( + "/api/v1/stationservice/stations", + headers=self.headers, + name="/stations/get", + ) + + if response.status_code == 200: + data = response.json() + stations_count = len(data.get("data", [])) if isinstance(data.get("data"), list) else 0 + # print(f"[Stations] Successfully retrieved {stations_count} stations") + else: + print(f"[Stations] Failed to get stations: {response.status_code}") + + except Exception as e: + print(f"[Stations] Error getting stations: {e}") diff --git a/sregym/service/__init__.py b/sregym/service/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sregym/service/apps/app_registry.py b/sregym/service/apps/app_registry.py new file mode 100644 index 0000000..e16d06b --- /dev/null +++ b/sregym/service/apps/app_registry.py @@ -0,0 +1,57 @@ +import json + +from sregym.paths import * +from sregym.service.apps.astronomy_shop import AstronomyShop +from sregym.service.apps.fleet_cast import FleetCast +from sregym.service.apps.flight_ticket import FlightTicket +from sregym.service.apps.hotel_reservation import HotelReservation +from sregym.service.apps.social_network import SocialNetwork +from sregym.service.apps.blueprint_hotel_reservation import BlueprintHotelReservation +from sregym.service.helm import Helm + +# from sregym.service.apps.train_ticket import TrainTicket + + +class AppRegistry: + def __init__(self): + self.APP_REGISTRY = { + "Astronomy Shop": AstronomyShop, + # "Flight Ticket": FlightTicket, + "Hotel Reservation": HotelReservation, + "Social Network": SocialNetwork, + # "Train Ticket": TrainTicket + "Fleet Cast": FleetCast, + "Blueprint Hotel Reservation": BlueprintHotelReservation + } + + self.APP_PATH = { + "Astronomy Shop": ASTRONOMY_SHOP_METADATA, + # "Flight Ticket": FLIGHT_TICKET_METADATA, + "Hotel Reservation": HOTEL_RES_METADATA, + "Social Network": SOCIAL_NETWORK_METADATA, + # "Train Ticket": TRAIN_TICKET_METADATA + "Fleet Cast": FLEET_CAST_METADATA, + "Blueprint Hotel Reservation": BLUEPRINT_HOTEL_RES_METADATA + } + + def get_app_instance(self, app_name: str): + if app_name not in self.APP_REGISTRY: + raise ValueError(f"App name {app_name} not found in registry.") + + return self.APP_REGISTRY.get(app_name)() + + def get_app_names(self): + return list(self.APP_REGISTRY.keys()) + + def get_app_config_file(self, app_name: str): + if app_name not in self.APP_PATH: + raise ValueError(f"App name {app_name} not found in registry.") + + return self.APP_PATH.get(app_name) + + def get_app_metadata(self, app_name: str): + config_file = self.get_app_config_file(app_name) + with open(config_file, "r") as file: + metadata = json.load(file) + + return metadata diff --git a/sregym/service/apps/astronomy_shop.py b/sregym/service/apps/astronomy_shop.py new file mode 100644 index 0000000..ce7b19e --- /dev/null +++ b/sregym/service/apps/astronomy_shop.py @@ -0,0 +1,77 @@ +"""Interface to the OpenTelemetry Astronomy Shop application""" + +import time + +from sregym.generators.workload.locust import LocustWorkloadManager +from sregym.observer.trace_api import TraceAPI +from sregym.paths import ASTRONOMY_SHOP_METADATA +from sregym.service.apps.base import Application +from sregym.service.helm import Helm +from sregym.service.kubectl import KubeCtl + + +class AstronomyShop(Application): + def __init__(self): + super().__init__(ASTRONOMY_SHOP_METADATA) + self.load_app_json() + self.kubectl = KubeCtl() + self.trace_api = None + self.create_namespace() + + def load_app_json(self): + super().load_app_json() + metadata = self.get_app_json() + self.app_name = metadata["Name"] + self.description = metadata["Desc"] + self.frontend_service = "frontend-proxy" + self.frontend_port = 8080 + + def deploy(self): + """Deploy the Helm configurations.""" + self.kubectl.create_namespace_if_not_exist(self.namespace) + + self.helm_configs["extra_args"] = [ + "--set-string", + "components.load-generator.envOverrides[0].name=LOCUST_BROWSER_TRAFFIC_ENABLED", + "--set-string", + "components.load-generator.envOverrides[0].value=false", + ] + + Helm.install(**self.helm_configs) + Helm.assert_if_deployed(self.helm_configs["namespace"]) + self.trace_api = TraceAPI(self.namespace) + self.trace_api.start_port_forward() + + def delete(self): + """Delete the Helm configurations.""" + Helm.uninstall(**self.helm_configs) + self.kubectl.delete_namespace(self.helm_configs["namespace"]) + self.kubectl.wait_for_namespace_deletion(self.namespace) + + def cleanup(self): + if self.trace_api: + self.trace_api.stop_port_forward() + Helm.uninstall(**self.helm_configs) + self.kubectl.delete_namespace(self.helm_configs["namespace"]) + + if hasattr(self, "wrk"): + # self.wrk.stop() + self.kubectl.delete_job(label="job=workload", namespace=self.namespace) + + def create_workload(self): + self.wrk = LocustWorkloadManager( + namespace=self.namespace, + locust_url="load-generator:8089", + ) + + def start_workload(self): + if not hasattr(self, "wrk"): + self.create_workload() + self.wrk.start() + + +# Run this code to test installation/deletion +# if __name__ == "__main__": +# shop = AstronomyShop() +# shop.deploy() +# shop.delete() diff --git a/sregym/service/apps/base.py b/sregym/service/apps/base.py new file mode 100644 index 0000000..6a78309 --- /dev/null +++ b/sregym/service/apps/base.py @@ -0,0 +1,81 @@ +import json + +from sregym.paths import TARGET_MICROSERVICES +import logging + +class Application: + """Base class for all microservice applications.""" + + def __init__(self, config_file: str): + self.config_file = config_file + self.name = None + self.namespace = None + self.helm_deploy = True + self.helm_configs = {} + self.k8s_deploy_path = None + self.local_logger = logging.getLogger("all.application") + self.local_logger.propagate = True + self.local_logger.setLevel(logging.DEBUG) + + def load_app_json(self): + """Load (basic) application metadata into attributes. + + # NOTE: override this method to load additional attributes! + """ + with open(self.config_file, "r") as file: + metadata = json.load(file) + + self.name = metadata["Name"] + self.namespace = metadata["Namespace"] + if "Helm Config" in metadata: + self.helm_configs = metadata["Helm Config"] + chart_path = self.helm_configs.get("chart_path") + + if chart_path and not self.helm_configs.get("remote_chart", False): + self.helm_configs["chart_path"] = str(TARGET_MICROSERVICES / chart_path) + + if "K8S Deploy Path" in metadata: + self.k8s_deploy_path = TARGET_MICROSERVICES / metadata["K8S Deploy Path"] + + def get_app_json(self) -> dict: + """Get application metadata in JSON format. + + Returns: + dict: application metadata + """ + with open(self.config_file, "r") as file: + app_json = json.load(file) + return app_json + + def get_app_summary(self) -> str: + """Get a summary of the application metadata in string format. + NOTE: for human and LLM-readable summaries! + + Returns: + str: application metadata + """ + app_json = self.get_app_json() + app_name = app_json.get("Name", "") + namespace = app_json.get("Namespace", "") + desc = app_json.get("Desc", "") + supported_operations = app_json.get("Supported Operations", []) + operations_str = "\n".join([f" - {op}" for op in supported_operations]) + + description = f"App Name: {app_name}\nNamespace: {namespace}\nDescription: {desc}\nSupported Operations:\n{operations_str}" + + return description + + def create_namespace(self): + """Create the namespace for the application if it doesn't exist.""" + result = self.kubectl.exec_command(f"kubectl get namespace {self.namespace}") + if "notfound" in result.lower(): + self.local_logger.info(f"Namespace {self.namespace} not found. Creating namespace.") + create_namespace_command = f"kubectl create namespace {self.namespace}" + create_result = self.kubectl.exec_command(create_namespace_command) + self.local_logger.info(f"Namespace {self.namespace} created successfully: {create_result}") + else: + self.local_logger.info(f"Namespace {self.namespace} already exists.") + + def cleanup(self): + """Delete the entire namespace for the application.""" + self.kubectl.delete_namespace(self.namespace) diff --git a/sregym/service/apps/blueprint_hotel_reservation.py b/sregym/service/apps/blueprint_hotel_reservation.py new file mode 100644 index 0000000..1aea8ec --- /dev/null +++ b/sregym/service/apps/blueprint_hotel_reservation.py @@ -0,0 +1,66 @@ +import time +import logging +from sregym.generators.workload.blueprint_hotel_work import BHotelWrk, BHotelWrkWorkloadManager +from sregym.observer.trace_api import TraceAPI +from sregym.paths import BLUEPRINT_HOTEL_RES_METADATA, FAULT_SCRIPTS, TARGET_MICROSERVICES +from sregym.service.apps.base import Application +from sregym.service.kubectl import KubeCtl + + +local_logger = logging.getLogger("all.application") +local_logger.propagate = True +local_logger.setLevel(logging.DEBUG) + +class BlueprintHotelReservation(Application): + def __init__(self): + super().__init__(BLUEPRINT_HOTEL_RES_METADATA) + self.kubectl = KubeCtl() + self.trace_api = None + self.trace_api = None + self.script_dir = FAULT_SCRIPTS + self.helm_deploy = False + + self.load_app_json() + + def load_app_json(self): + super().load_app_json() + metadata = self.get_app_json() + self.app_name = metadata["Name"] + self.description = metadata["Desc"] + self.k8s_workload_job_path = TARGET_MICROSERVICES / metadata["K8S Workload Job Path"] + + def deploy(self): + """Deploy the Kubernetes configurations.""" + local_logger.info(f"Deploying Kubernetes configurations in namespace: {self.namespace}") + self.create_namespace() + self.kubectl.apply_configs(self.namespace, self.k8s_deploy_path) + self.kubectl.wait_for_ready(self.namespace) + self.trace_api = TraceAPI(self.namespace) + self.trace_api.start_port_forward() + + def delete(self): + """Delete the configmap.""" + self.kubectl.delete_configs(self.namespace, self.k8s_deploy_path) + """Delete the workload job if exists""" + self.kubectl.delete_configs(self.namespace, self.k8s_workload_job_path) + + def cleanup(self): + """Delete the entire namespace for the hotel reservation application.""" + if self.trace_api: + self.trace_api.stop_port_forward() + self.kubectl.delete_namespace(self.namespace) + self.kubectl.wait_for_namespace_deletion(self.namespace) + self.kubectl.delete_job(label="job=workload", namespace=self.namespace) + + # helper methods + def _read_script(self, file_path: str) -> str: + with open(file_path, "r") as file: + return file.read() + + def create_workload(self, tput: int = None, duration: str = None, multiplier: int = None): + # The observation workload interface is in the problem class, keeping this interface empty to keep consistency in conductor + pass + + def start_workload(self): + # The observation workload interface is in the problem class, keeping this interface empty to keep consistency in conductor + pass diff --git a/sregym/service/apps/composite_app.py b/sregym/service/apps/composite_app.py new file mode 100644 index 0000000..4ee2b9d --- /dev/null +++ b/sregym/service/apps/composite_app.py @@ -0,0 +1,39 @@ +"""A class representing a composite of mulitple applications""" + +import json + +from sregym.paths import TARGET_MICROSERVICES +from sregym.service.apps.base import Application + + +class CompositeApp: + def __init__(self, apps: list[Application]): + self.namespace = "Multiple namespaces" + self.apps = {} + for app in apps: + if app.name in self.apps.keys(): + print(f"[CompositeApp] same app name: {app.name}, continue.") + continue + self.apps[app.name] = app + print(f"[CompositeApp] Apps: {self.apps}") + self.name = "CompositeApp" + self.app_name = "CompositeApp" + self.description = f"Composite application containing {len(self.apps)} apps: {', '.join(self.apps.keys())}" + + def deploy(self): + # FIXME: this can be optimized to parallel deploy later + for app in self.apps.values(): + print(f"[CompositeApp] Deploying {app.name}...") + app.deploy() + + def start_workload(self): + # FIXME: this can be optimized to parallel start later + for app in self.apps.values(): + print(f"[CompositeApp] Starting workload for {app.name}...") + app.start_workload() + + def cleanup(self): + # FIXME: this can be optimized to parallel cleanup later + for app in self.apps.values(): + print(f"[CompositeApp] Cleaning up {app.name}...") + app.cleanup() diff --git a/sregym/service/apps/fleet_cast.py b/sregym/service/apps/fleet_cast.py new file mode 100644 index 0000000..984ffbc --- /dev/null +++ b/sregym/service/apps/fleet_cast.py @@ -0,0 +1,271 @@ +"""Interface to the FleetCast application (Ingress-enabled on first install; robust readiness checks).""" + +import json +import os +import subprocess +import time +from pathlib import Path + +from sregym.generators.workload.locust import LocustWorkloadManager +from sregym.observer import tidb_prometheus +from sregym.observer.tidb_cluster_deploy_helper import TiDBClusterDeployHelper +from sregym.observer.logstash.jaeger.jaeger import Jaeger +from sregym.paths import FLEET_CAST_METADATA +from sregym.service.apps.base import Application +from sregym.service.apps.tidb_cluster_operator import TiDBClusterDeployer +from sregym.service.helm import Helm +from sregym.service.kubectl import KubeCtl + + +class FleetCast(Application): + def __init__(self): + super().__init__(FLEET_CAST_METADATA) + self.load_app_json() + self.kubectl = KubeCtl() + self.create_namespace() + + def _sh(self, cmd: str, check: bool = True, capture: bool = False) -> str: + """Run a shell command; supports capture.""" + print(f"$ {cmd}") + env = os.environ.copy() + env.setdefault("HELM_MAX_CHART_FILE_SIZE", "104857600") # 100 MiB + if capture: + return subprocess.check_output(cmd, shell=True, env=env).decode() + subprocess.run(cmd, shell=True, check=check, env=env) + return "" + + def load_app_json(self): + super().load_app_json() + metadata = self.get_app_json() + self.app_name = metadata["Name"] + self.description = metadata["Desc"] + + self.frontend_service = "satellite-app-frontend" + self.frontend_port = 80 + + def ensure_ingress_controller(self): + """Install nginx-ingress via Helm if missing, then wait until it's Running.""" + try: + out = self._sh("kubectl get pods -n ingress-nginx -o json", capture=True) + if len(json.loads(out).get("items", [])) == 0: + raise RuntimeError("ingress-nginx empty") + print("[ingress] ingress-nginx already present.") + except Exception: + self._sh("helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx || true") + self._sh("helm repo update") + self._sh( + "helm upgrade --install ingress-nginx ingress-nginx/ingress-nginx " + "-n ingress-nginx --create-namespace " + "--set controller.publishService.enabled=true" + ) + + print("[ingress] waiting for controller to be Running…") + for _ in range(60): + try: + phase = ( + self._sh( + "kubectl -n ingress-nginx get pods " + "-l app.kubernetes.io/name=ingress-nginx,app.kubernetes.io/component=controller " + "-o jsonpath='{.items[0].status.phase}'", + capture=True, + ) + .strip() + .strip("'") + ) + if phase == "Running": + print("[ingress] controller Running.") + return + except Exception: + pass + time.sleep(2) + raise RuntimeError("ingress-nginx controller did not become Ready in time.") + + def deploy(self): + """Deploy TiDB, then install FleetCast chart from repo with Ingress enabled on the first install.""" + self.kubectl.create_namespace_if_not_exist(self.namespace) + + self.ensure_ingress_controller() + + print("Deploying TiDB Cluster with Operator...") + base_dir = Path(__file__).parent.parent + meta_path = base_dir / "metadata" / "tidb_metadata.json" + TiDBClusterDeployHelper.running_cluster() + print("---DEPLOYED TiDB CLUSTER---") + + Helm.add_repo("fleetcast", "https://lilygn.github.io/FleetCast") + + release = self.helm_configs.get("release_name", "fleetcast") + fullname = f"{release}-satellite-app" + fe_svc = f"{fullname}-frontend" + be_svc = f"{fullname}-backend" + + ingress_args = [ + "--set-string", + "ingress.enabled=true", + "--set-string", + "ingress.className=nginx", + "--set-string", + "ingress.hosts[0].host=orbital.local", + "--set-string", + "ingress.hosts[0].paths[0].path=/", + "--set-string", + "ingress.hosts[0].paths[0].pathType=Prefix", + "--set-string", + f"ingress.hosts[0].paths[0].backend.serviceName={fe_svc}", + "--set", + "ingress.hosts[0].paths[0].backend.servicePort=80", + "--set-string", + "ingress.hosts[0].paths[1].path=/api", + "--set-string", + "ingress.hosts[0].paths[1].pathType=Prefix", + "--set-string", + f"ingress.hosts[0].paths[1].backend.serviceName={be_svc}", + "--set", + "ingress.hosts[0].paths[1].backend.servicePort=5000", + ] + + extra = self.helm_configs.get("extra_args", []) + if isinstance(extra, str): + extra = [extra] + self.helm_configs["extra_args"] = extra + ingress_args + + Helm.install(**self.helm_configs) + Helm.assert_if_deployed(self.helm_configs["namespace"]) + print("---DEPLOYED FLEETCAST---") + + self.wait_for_ingress_ready(ingress_name=f"{fullname}") + self._print_access_hints() + print("\n FleetCast deployment is complete and ready.") + tidb_prometheus.main() + print("PROMETHEUS: deployed TiDB monitoring stack.") + Jaeger().deploy() + + def _get_ingress_svc_info(self) -> dict: + """Return info about ingress-nginx-controller Service (type, external ip/hostname, nodePort for http/https).""" + svc_json = self._sh( + "kubectl -n ingress-nginx get svc ingress-nginx-controller -o json", + capture=True, + ) + data = json.loads(svc_json) + info = { + "type": data["spec"].get("type"), + "external_ip": None, + "external_hostname": None, + "http_nodeport": None, + "https_nodeport": None, + } + + lb = data.get("status", {}).get("loadBalancer", {}) + ing = lb.get("ingress") or [] + if ing: + info["external_ip"] = ing[0].get("ip") + info["external_hostname"] = ing[0].get("hostname") + + for p in data["spec"].get("ports", []): + name = p.get("name", "") + if name == "http" or p.get("port") == 80: + info["http_nodeport"] = p.get("nodePort") + if name == "https" or p.get("port") == 443: + info["https_nodeport"] = p.get("nodePort") + + return info + + def _print_access_hints(self): + """Print friendly URLs based on current ingress Service shape.""" + info = self._get_ingress_svc_info() + + if info["type"] == "LoadBalancer" and (info["external_ip"] or info["external_hostname"]): + host = info["external_ip"] or info["external_hostname"] + print(f"\n FleetCast should be reachable at: http://orbital.local/") + print(f" (map {host} to orbital.local in /etc/hosts)") + print(f" Backend health: http://orbital.local/api/health") + print(f" e.g., echo '{host} orbital.local' | sudo tee -a /etc/hosts") + return + + np = info["http_nodeport"] + if np: + print(f"\n FleetCast should be reachable at: http://orbital.local:{np}/") + print(f" Backend health: http://orbital.local:{np}/api/health") + print("If developing locally, add a hosts entry:") + print(" echo ' orbital.local' | sudo tee -a /etc/hosts") + print("Then open the URLs above (replace if you curl without the Host header).") + else: + print("\nℹ️ Could not detect NodePort for HTTP. Try a local forward:") + print(" sudo kubectl -n ingress-nginx port-forward svc/ingress-nginx-controller 80:80") + print(" # then use http://orbital.local/ after adding '127.0.0.1 orbital.local' to /etc/hosts") + + def wait_for_ingress_ready(self, ingress_name: str, timeout: int = 180): + """Wait for Ingress to exist with rules and for ALL backend Services referenced to have endpoints.""" + ns = self.helm_configs.get("namespace", self.namespace) + + t0 = time.time() + data = None + while time.time() - t0 < timeout: + try: + raw = self._sh(f"kubectl get ingress {ingress_name} -n {ns} -o json", capture=True) + data = json.loads(raw) + rules = data.get("spec", {}).get("rules", []) + if rules: + print("[ingress] Ingress has rules.") + break + except Exception: + pass + time.sleep(2) + else: + raise RuntimeError(f"Ingress {ingress_name} not created with rules in time.") + + backend_svcs = set() + for rule in data.get("spec", {}).get("rules", []): + http = rule.get("http", {}) + for p in http.get("paths", []): + svc = p.get("backend", {}).get("service", {}) + name = svc.get("name") + if name: + backend_svcs.add(name) + + if not backend_svcs: + raise RuntimeError("No backend services found in Ingress spec.") + + missing = [] + for svc in sorted(backend_svcs): + ok = False + t1 = time.time() + while time.time() - t1 < timeout: + try: + ed = json.loads(self._sh(f"kubectl get endpoints {svc} -n {ns} -o json", capture=True)) + if any(s.get("addresses") for s in ed.get("subsets", [])): + ok = True + break + except Exception: + pass + time.sleep(2) + if not ok: + missing.append(svc) + + if missing: + raise RuntimeError(f"Service endpoints missing for: {', '.join(missing)}") + + print("[ingress] All backend service endpoints are ready:", ", ".join(sorted(backend_svcs))) + + def delete(self): + """Delete the Helm configurations.""" + Helm.uninstall(**self.helm_configs) + self.kubectl.delete_namespace(self.helm_configs["namespace"]) + self.kubectl.wait_for_namespace_deletion(self.namespace) + + def cleanup(self): + Helm.uninstall(**self.helm_configs) + self.kubectl.delete_namespace(self.helm_configs["namespace"]) + if hasattr(self, "wrk"): + self.wrk.stop() + + def create_workload(self): + self.wrk = LocustWorkloadManager( + namespace=self.namespace, + locust_url="load-generator:8089", + ) + + def start_workload(self): + if not hasattr(self, "wrk"): + self.create_workload() + self.wrk.start() diff --git a/sregym/service/apps/flight_ticket.py b/sregym/service/apps/flight_ticket.py new file mode 100644 index 0000000..5296bed --- /dev/null +++ b/sregym/service/apps/flight_ticket.py @@ -0,0 +1,50 @@ +"""Interface to the Flight Ticket application""" + +import time + +from sregym.paths import FLIGHT_TICKET_METADATA, TARGET_MICROSERVICES +from sregym.service.apps.base import Application +from sregym.service.helm import Helm +from sregym.service.kubectl import KubeCtl + + +class FlightTicket(Application): + def __init__(self): + super().__init__(FLIGHT_TICKET_METADATA) + self.load_app_json() + self.kubectl = KubeCtl() + self.create_namespace() + + def load_app_json(self): + super().load_app_json() + metadata = self.get_app_json() + self.app_name = metadata["Name"] + self.description = metadata["Desc"] + self.frontend_service = None + self.frontend_port = None + + def deploy(self): + """Deploy the Helm configurations.""" + self.kubectl.create_namespace_if_not_exist(self.namespace) + Helm.add_repo( + "flight-ticket", + "https://xlab-uiuc.github.io/flight-ticket", + ) + Helm.install(**self.helm_configs) + Helm.assert_if_deployed(self.helm_configs["namespace"]) + + def delete(self): + """Delete the Helm configurations.""" + # NOTE: We should probably clear redis? + Helm.uninstall(**self.helm_configs) + time.sleep(30) + + def cleanup(self): + Helm.uninstall(**self.helm_configs) + self.kubectl.delete_namespace(self.helm_configs["namespace"]) + + +# if __name__ == "__main__": +# app = FlightTicket() +# app.deploy() +# app.delete() diff --git a/sregym/service/apps/helpers.py b/sregym/service/apps/helpers.py new file mode 100644 index 0000000..b893bc7 --- /dev/null +++ b/sregym/service/apps/helpers.py @@ -0,0 +1,8 @@ +from sregym.service.apps.base import Application +from sregym.service.kubectl import KubeCtl + + +def get_frontend_url(app: Application): + kubectl = KubeCtl() + endpoint = kubectl.get_cluster_ip(app.frontend_service, app.namespace) + return f"http://{endpoint}:{app.frontend_port}" diff --git a/sregym/service/apps/hotel_reservation.py b/sregym/service/apps/hotel_reservation.py new file mode 100644 index 0000000..bdd1b90 --- /dev/null +++ b/sregym/service/apps/hotel_reservation.py @@ -0,0 +1,152 @@ +import time +import logging +from sregym.generators.workload.wrk2 import Wrk2, Wrk2WorkloadManager +from sregym.observer.trace_api import TraceAPI +from sregym.paths import FAULT_SCRIPTS, HOTEL_RES_METADATA, TARGET_MICROSERVICES +from sregym.service.apps.base import Application +from sregym.service.apps.helpers import get_frontend_url +from sregym.service.kubectl import KubeCtl + + +local_logger = logging.getLogger("all.application") +local_logger.propagate = True +local_logger.setLevel(logging.DEBUG) + +class HotelReservation(Application): + def __init__(self): + super().__init__(HOTEL_RES_METADATA) + self.kubectl = KubeCtl() + self.trace_api = None + self.trace_api = None + self.script_dir = FAULT_SCRIPTS + self.helm_deploy = False + + self.load_app_json() + + self.payload_script = ( + TARGET_MICROSERVICES / "hotelReservation/wrk2/scripts/hotel-reservation/mixed-workload_type_1.lua" + ) + + def load_app_json(self): + super().load_app_json() + metadata = self.get_app_json() + self.app_name = metadata["Name"] + self.description = metadata["Desc"] + self.frontend_service = metadata.get("frontend_service", "frontend") + self.frontend_port = metadata.get("frontend_port", 5000) + + def create_configmaps(self): + """Create configmaps for the hotel reservation application.""" + self.kubectl.create_or_update_configmap( + name="mongo-rate-script", + namespace=self.namespace, + data=self._prepare_configmap_data(["k8s-rate-mongo.sh"]), + ) + + self.kubectl.create_or_update_configmap( + name="mongo-geo-script", + namespace=self.namespace, + data=self._prepare_configmap_data(["k8s-geo-mongo.sh"]), + ) + + script_files = [ + "revoke-admin-rate-mongo.sh", + "revoke-mitigate-admin-rate-mongo.sh", + "remove-admin-mongo.sh", + "remove-mitigate-admin-rate-mongo.sh", + ] + + self.kubectl.create_or_update_configmap( + name="failure-admin-rate", + namespace=self.namespace, + data=self._prepare_configmap_data(script_files), + ) + + script_files = [ + "revoke-admin-geo-mongo.sh", + "revoke-mitigate-admin-geo-mongo.sh", + "remove-admin-mongo.sh", + "remove-mitigate-admin-geo-mongo.sh", + ] + + self.kubectl.create_or_update_configmap( + name="failure-admin-geo", + namespace=self.namespace, + data=self._prepare_configmap_data(script_files), + ) + + def deploy(self): + """Deploy the Kubernetes configurations.""" + self.local_logger.info(f"Deploying Kubernetes configurations in namespace: {self.namespace}") + self.create_namespace() + self.create_configmaps() + self.kubectl.apply_configs(self.namespace, self.k8s_deploy_path) + self.kubectl.wait_for_ready(self.namespace) + self.trace_api = TraceAPI(self.namespace) + self.trace_api.start_port_forward() + + def delete(self): + """Delete the configmap.""" + self.kubectl.delete_configs(self.namespace, self.k8s_deploy_path) + + def cleanup(self): + """Delete the entire namespace for the hotel reservation application.""" + if self.trace_api: + self.trace_api.stop_port_forward() + self.kubectl.delete_namespace(self.namespace) + self.kubectl.wait_for_namespace_deletion(self.namespace) + pvs = self.kubectl.exec_command( + "kubectl get pv --no-headers | grep 'hotel-reservation' | awk '{print $1}'" + ).splitlines() + + for pv in pvs: + # Check if the PV is in a 'Terminating' state and remove the finalizers if necessary + self._remove_pv_finalizers(pv) + delete_command = f"kubectl delete pv {pv}" + delete_result = self.kubectl.exec_command(delete_command) + local_logger.info(f"Deleted PersistentVolume {pv}: {delete_result.strip()}") + time.sleep(5) + + if hasattr(self, "wrk"): + # self.wrk.stop() + self.kubectl.delete_job(label="job=workload", namespace=self.namespace) + + def _remove_pv_finalizers(self, pv_name: str): + """Remove finalizers from the PersistentVolume to prevent it from being stuck in a 'Terminating' state.""" + # Patch the PersistentVolume to remove finalizers if it is stuck + patch_command = f'kubectl patch pv {pv_name} -p \'{{"metadata":{{"finalizers":null}}}}\'' + _ = self.kubectl.exec_command(patch_command) + + # helper methods + def _prepare_configmap_data(self, script_files: list) -> dict: + data = {} + for file in script_files: + data[file] = self._read_script(f"{self.script_dir}/{file}") + return data + + def _read_script(self, file_path: str) -> str: + with open(file_path, "r") as file: + return file.read() + + def create_workload( + self, rate: int = 100, dist: str = "exp", connections: int = 100, duration: int = 30, threads: int = 3 + ): + self.wrk = Wrk2WorkloadManager( + wrk=Wrk2( + rate=rate, + dist=dist, + connections=connections, + duration=duration, + threads=threads, + namespace=self.namespace, + ), + payload_script=self.payload_script, + url=f"{{placeholder}}", + namespace=self.namespace, + ) + + def start_workload(self): + if not hasattr(self, "wrk"): + self.create_workload() + self.wrk.url = get_frontend_url(self) + self.wrk.start() diff --git a/sregym/service/apps/social_network.py b/sregym/service/apps/social_network.py new file mode 100644 index 0000000..2335fd3 --- /dev/null +++ b/sregym/service/apps/social_network.py @@ -0,0 +1,109 @@ +"""Interface to the social network application from DeathStarBench""" + +import time + +from sregym.generators.workload.wrk2 import Wrk2, Wrk2WorkloadManager +from sregym.observer.trace_api import TraceAPI +from sregym.paths import SOCIAL_NETWORK_METADATA, TARGET_MICROSERVICES +from sregym.service.apps.base import Application +from sregym.service.apps.helpers import get_frontend_url +from sregym.service.helm import Helm +from sregym.service.kubectl import KubeCtl +import logging + +local_logger = logging.getLogger("all.sregym.social_network") +local_logger.propagate = True +local_logger.setLevel(logging.DEBUG) + +class SocialNetwork(Application): + def __init__(self): + super().__init__(SOCIAL_NETWORK_METADATA) + self.load_app_json() + self.kubectl = KubeCtl() + self.trace_api = None + self.local_tls_path = TARGET_MICROSERVICES / "socialNetwork/helm-chart/socialnetwork" + + self.payload_script = TARGET_MICROSERVICES / "socialNetwork/wrk2/scripts/social-network/mixed-workload.lua" + + def load_app_json(self): + super().load_app_json() + metadata = self.get_app_json() + self.app_name = metadata["Name"] + self.description = metadata["Desc"] + self.frontend_service = metadata.get("frontend_service", "nginx-thrift") + self.frontend_port = metadata.get("frontend_port", 8080) + + def create_tls_secret(self): + check_sec = f"kubectl get secret mongodb-tls -n {self.namespace}" + result = self.kubectl.exec_command(check_sec) + if "notfound" in result.lower(): + create_sec_command = ( + f"kubectl create secret generic mongodb-tls " + f"--from-file=tls.pem={self.local_tls_path}/tls.pem " + f"--from-file=ca.crt={self.local_tls_path}/ca.crt " + f"-n {self.namespace}" + ) + create_result = self.kubectl.exec_command(create_sec_command) + local_logger.debug(f"TLS secret created: {create_result.strip()}") + else: + local_logger.debug("TLS secret already exists. Skipping creation.") + + def deploy(self): + """Deploy the Helm configurations with architecture-aware image selection.""" + self.create_namespace() + self.create_tls_secret() + node_architectures = self.kubectl.get_node_architectures() + is_arm = any(arch in ["arm64", "aarch64"] for arch in node_architectures) + + if is_arm: + # Use the ARM-compatible image for media-frontend + if "extra_args" not in self.helm_configs: + self.helm_configs["extra_args"] = [] + + self.helm_configs["extra_args"].append( + "--set media-frontend.container.image=jacksonarthurclark/media-frontend" + ) + self.helm_configs["extra_args"].append("--set media-frontend.container.imageVersion=latest") + + Helm.install(**self.helm_configs) + Helm.assert_if_deployed(self.helm_configs["namespace"]) + self.trace_api = TraceAPI(self.namespace) + self.trace_api.start_port_forward() + + def delete(self): + """Delete the Helm configurations.""" + Helm.uninstall(**self.helm_configs) + + def cleanup(self): + """Delete the entire namespace for the social network application.""" + if self.trace_api: + self.trace_api.stop_port_forward() + Helm.uninstall(**self.helm_configs) + + if hasattr(self, "wrk"): + # self.wrk.stop() + self.kubectl.delete_job(label="job=workload", namespace=self.namespace) + self.kubectl.delete_namespace(self.namespace) + + def create_workload( + self, rate: int = 100, dist: str = "exp", connections: int = 3, duration: int = 10, threads: int = 3 + ): + self.wrk = Wrk2WorkloadManager( + wrk=Wrk2( + rate=rate, + dist=dist, + connections=connections, + duration=duration, + threads=threads, + namespace=self.namespace, + ), + payload_script=self.payload_script, + url=f"{{placeholder}}/wrk2-api/post/compose", + namespace=self.namespace, + ) + + def start_workload(self): + if not hasattr(self, "wrk"): + self.create_workload() + self.wrk.url = get_frontend_url(self) + "/wrk2-api/post/compose" + self.wrk.start() diff --git a/sregym/service/apps/tidb_cluster_operator.py b/sregym/service/apps/tidb_cluster_operator.py new file mode 100644 index 0000000..9d0ad73 --- /dev/null +++ b/sregym/service/apps/tidb_cluster_operator.py @@ -0,0 +1,296 @@ +import json +import os +import subprocess +import time +from pathlib import Path +from textwrap import dedent + +from sregym.observer import tidb_prometheus +from sregym.paths import BASE_DIR + + +class TiDBClusterDeployer: + def __init__(self, metadata_path): + with open(metadata_path, "r") as f: + self.metadata = json.load(f) + + self.name = self.metadata["Name"] + self.namespace_tidb_cluster = self.metadata["K8S Config"]["namespace"] + self.cluster_config_url = self.metadata["K8S Config"]["config_url"] + + self.operator_namespace = self.metadata["Helm Operator Config"]["namespace"] + self.operator_release_name = self.metadata["Helm Operator Config"]["release_name"] + self.operator_chart = self.metadata["Helm Operator Config"]["chart_path"] + self.operator_version = self.metadata["Helm Operator Config"]["version"] + self.operator_crd_url = self.metadata["Helm Operator Config"]["CRD"] + + env_path = os.environ.get("TIDB_OPERATOR_VALUES") + self.operator_values_path = "" + if env_path and Path(env_path).expanduser().exists(): + self.operator_values_path = str(Path(env_path).expanduser().resolve()) + else: + repo_root = Path(__file__).resolve().parents[3] + candidates = [ + repo_root / "SREGym-applications/FleetCast/satellite-app/values.yaml", + repo_root / "SREGym-applications/FleetCast/tidb-operator/values.yaml", + ] + for p in candidates: + if p.exists(): + self.operator_values_path = str(p.resolve()) + break + + self.tidb_service = self.metadata.get("TiDB Service", "basic-tidb") + self.tidb_port = int(self.metadata.get("TiDB Port", 4000)) + self.tidb_user = self.metadata.get("TiDB User", "root") + + def run_cmd(self, cmd): + print(f"Running: {cmd}") + subprocess.run(cmd, shell=True, check=True) + + def create_namespace(self, ns): + self.run_cmd(f"kubectl create ns {ns} --dry-run=client -o yaml | kubectl apply -f -") + + def install_crds(self): + print(f"Installing CRDs from {self.operator_crd_url} ...") + self.run_cmd(f"kubectl create -f {self.operator_crd_url} || kubectl replace -f {self.operator_crd_url}") + + def install_local_path_provisioner(self): + print("Installing local-path provisioner for dynamic volume provisioning...") + self.run_cmd( + "kubectl apply -f https://raw.githubusercontent.com/rancher/local-path-provisioner/master/deploy/local-path-storage.yaml" + ) + self.run_cmd( + 'kubectl patch storageclass local-path -p \'{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}\'' + ) + + def apply_prometheus(self): + ns = "observe" + prom_yml_path = BASE_DIR / "SREGym-applications/FleetCast/prometheus/prometheus.yaml" + + prom_yml_path = str(prom_yml_path.resolve()) + if not os.path.isfile(prom_yml_path): + raise FileNotFoundError(f"prometheus.yaml not found at {prom_yml_path}") + + self.run_cmd( + f"kubectl -n {ns} create configmap prometheus-config " + f"--from-file=prometheus.yml={prom_yml_path} " + f"-o yaml --dry-run=client | kubectl apply -f -" + ) + + self.run_cmd( + "kubectl -n observe port-forward svc/prometheus-server 9090:80 >/dev/null 2>&1 & " + "PF=$!; sleep 1; curl -s -X POST http://127.0.0.1:9090/-/reload >/dev/null; kill $PF || true" + ) + + print(f"[ok] Prometheus config applied from {prom_yml_path}") + + def install_operator_with_values(self): + print(f"Installing/upgrading TiDB Operator via Helm in namespace '{self.operator_namespace}'...") + self.create_namespace(self.operator_namespace) + self.run_cmd("helm repo add pingcap https://charts.pingcap.org || true") + self.run_cmd("helm repo update") + + values_arg = "" + if self.operator_values_path: + print(f"[info] Using values file: {self.operator_values_path}") + values_arg = f"-f {self.operator_values_path}" + else: + print("[warn] No values.yaml found; installing with chart defaults") + + self.run_cmd( + f"helm upgrade --install {self.operator_release_name} {self.operator_chart} " + f"--version {self.operator_version} -n {self.operator_namespace} " + f"--create-namespace {values_arg} " + ) + + def wait_for_operator_ready(self): + print("Waiting for tidb-controller-manager pod to be running...") + label = "app.kubernetes.io/component=controller-manager" + for _ in range(24): + try: + status = ( + subprocess.check_output( + f"kubectl get pods -n {self.operator_namespace} -l {label} -o jsonpath='{{.items[0].status.phase}}'", + shell=True, + ) + .decode() + .strip() + ) + if status == "Running": + print(" tidb-controller-manager pod is running.") + return + except subprocess.CalledProcessError: + pass + print("-- Pod not ready yet, retrying in 5 seconds...") + time.sleep(5) + raise RuntimeError("--------Timeout waiting for tidb-controller-manager pod") + + def deploy_tidb_cluster(self): + print(f"Creating TiDB cluster namespace '{self.namespace_tidb_cluster}'...") + self.create_namespace(self.namespace_tidb_cluster) + print(f"Deploying TiDB cluster manifest from {self.cluster_config_url}...") + self.run_cmd(f"kubectl apply -f {self.cluster_config_url} -n {self.namespace_tidb_cluster}") + + def run_sql(self, sql_text: str): + ns = self.namespace_tidb_cluster + svc = f"{self.tidb_service}.{ns}.svc" + port = self.tidb_port + user = self.tidb_user + + self.run_cmd(f"kubectl -n {ns} delete pod/mysql-client --ignore-not-found") + self.run_cmd( + f"kubectl -n {ns} run mysql-client --image=mysql:8 --restart=Never --command -- sleep 3600 || true" + ) + self.run_cmd(f"kubectl -n {ns} wait --for=condition=Ready pod/mysql-client --timeout=180s") + + sql = dedent(sql_text).strip() + heredoc = f"""kubectl -n {ns} exec -i mysql-client -- bash -lc "cat <<'SQL' | mysql -h {svc} -P {port} -u{user} +{sql} +SQL" +""" + self.run_cmd(heredoc) + + self.run_cmd(f"kubectl -n {ns} delete pod/mysql-client --wait=false || true") + + def init_schema_and_seed(self): + + print("Initializing schema and seeding data in satellite_sim ...") + sql = """ + CREATE DATABASE IF NOT EXISTS satellite_sim; + USE satellite_sim; + + DROP TABLE IF EXISTS telemetry; + DROP TABLE IF EXISTS contact_windows; + + CREATE TABLE contact_windows ( + id BIGINT PRIMARY KEY AUTO_INCREMENT, + satellite_id VARCHAR(20), + ground_station_id VARCHAR(20), + start_time DATETIME, + end_time DATETIME, + timestamp DATETIME, + distance FLOAT, + datavolume INT, + priority INT, + assigned BOOLEAN DEFAULT FALSE, + KEY idx_active (assigned, end_time), + KEY idx_gs_sat (ground_station_id, satellite_id) + ); + + CREATE TABLE telemetry ( + id BIGINT PRIMARY KEY AUTO_INCREMENT, + satellite_id VARCHAR(20), + ground_station_id VARCHAR(20), + timestamp DATETIME, + battery_level FLOAT, + temperature FLOAT, + position_lat FLOAT, + position_lon FLOAT, + status VARCHAR(20), + KEY idx_sat_time (satellite_id, timestamp), + KEY idx_gs_time (ground_station_id, timestamp) + ); + """ + self.run_sql(sql) + + def wait_for_pods_ready(self, selector: str, poll: float = 1.0): + """ + Poll every `poll` seconds until ALL pods matching `selector` are Ready. + Runs indefinitely until condition is met. + """ + ns = self.namespace_tidb_cluster + while True: + try: + out = subprocess.check_output( + f"kubectl -n {ns} get pods -l '{selector}' " + "-o jsonpath='{range .items[*]}{.metadata.name} " + '{range .status.containerStatuses[*]}{.ready} {end}{"\\n"}{end}\'', + shell=True, + ).decode() + + lines = [ln.strip() for ln in out.splitlines() if ln.strip()] + if lines: + all_ready = True + for ln in lines: + parts = ln.split() + if not parts or not all(p.lower() == "true" for p in parts[1:]): + all_ready = False + break + if all_ready: + print(f"[ok] All pods with selector '{selector}' are Ready.") + return + except subprocess.CalledProcessError: + pass + time.sleep(poll) + + def wait_for_basic_workloads(self): + """ + Wait (no timeout) for PD, TiKV, and TiDB pods to be Ready, + then wait for the TiDB Service to exist and have endpoints. + """ + ns = self.namespace_tidb_cluster + cluster = "basic" + + # 1) PD + self.wait_for_pods_ready(selector="app.kubernetes.io/instance=basic,app.kubernetes.io/component=pd") + + # 2) TiKV + self.wait_for_pods_ready(selector="app.kubernetes.io/instance=basic,app.kubernetes.io/component=tikv") + + # 3) TiDB + self.wait_for_pods_ready(selector="app.kubernetes.io/instance=basic,app.kubernetes.io/component=tidb") + + try: + svc_name = ( + subprocess.check_output( + f"kubectl -n {ns} get svc -l app.kubernetes.io/instance={cluster}," + "app.kubernetes.io/component=tidb " + "-o jsonpath='{.items[0].metadata.name}'", + shell=True, + ) + .decode() + .strip() + .strip("'") + ) + if not svc_name: + svc_name = self.tidb_service + except subprocess.CalledProcessError: + svc_name = self.tidb_service + + print(f"[info] Using TiDB Service: {svc_name}") + self.tidb_service = svc_name + while True: + try: + eps = ( + subprocess.check_output( + f"kubectl -n {ns} get endpoints {svc_name} " + "-o jsonpath='{range .subsets[*].addresses[*]}{.ip}{\"\\n\"}{end}'", + shell=True, + ) + .decode() + .strip() + .strip("'") + ) + if eps: + print(f"[ok] Service {svc_name} has endpoints:\n{eps}") + return + except subprocess.CalledProcessError: + pass + time.sleep(1.0) + + def deploy_all(self): + print(f"----------Starting deployment: {self.name}") + self.create_namespace(self.namespace_tidb_cluster) + self.install_local_path_provisioner() + self.install_crds() + self.install_operator_with_values() + self.wait_for_operator_ready() + self.deploy_tidb_cluster() + self.wait_for_basic_workloads() + self.init_schema_and_seed() + print("-------------TiDB cluster deployment complete.") + + +if __name__ == "__main__": + deployer = TiDBClusterDeployer("../metadata/tidb_metadata.json") + deployer.deploy_all() diff --git a/sregym/service/apps/train_ticket.py b/sregym/service/apps/train_ticket.py new file mode 100644 index 0000000..280651b --- /dev/null +++ b/sregym/service/apps/train_ticket.py @@ -0,0 +1,163 @@ +"""Interface to the Train Ticket application""" + +import os +import tempfile +import time +from pathlib import Path + +from sregym.generators.workload.locust import LocustWorkloadManager +from sregym.paths import TARGET_MICROSERVICES, TRAIN_TICKET_METADATA +from sregym.service.apps.base import Application +from sregym.service.helm import Helm +from sregym.service.kubectl import KubeCtl + + +class TrainTicket(Application): + def __init__(self): + super().__init__(str(TRAIN_TICKET_METADATA)) + self.load_app_json() + self.kubectl = KubeCtl() + self.workload_manager = None + self.create_namespace() + + def load_app_json(self): + super().load_app_json() + metadata = self.get_app_json() + self.app_name = metadata["Name"] + self.description = metadata["Desc"] + self.frontend_service = None + self.frontend_port = None + + def deploy(self): + if self._is_train_ticket_deployed(): + print( + f"[TrainTicket] Skipping deployment: train-ticket app is already deployed in namespace {self.namespace}" + ) + return + + if self.namespace: + self.kubectl.create_namespace_if_not_exist(self.namespace) + + Helm.install(**self.helm_configs) + self.kubectl.wait_for_job_completion(job_name="train-ticket-deploy", namespace="train-ticket", timeout=1200) + + self._deploy_flagd_infrastructure() + self._deploy_load_generator() + + def delete(self): + """Delete the Helm configurations.""" + # Helm.uninstall(**self.helm_configs) # Don't helm uninstall until cleanup job is fixed on train-ticket + if self._is_train_ticket_deployed(): + print( + f"[TrainTicket] Skipping deletion: train-ticket app is currently deployed in namespace {self.namespace}" + ) + return + + if self.namespace: + self.kubectl.delete_namespace(self.namespace) + self.kubectl.wait_for_namespace_deletion(self.namespace) + + def _is_train_ticket_deployed(self): + """Check if the train-ticket app is currently deployed.""" + try: + + # Check if the namespace exists + namespace_exists = self.kubectl.exec_command(f"kubectl get namespace {self.namespace}") + if "not found" in namespace_exists or "No resources found" in namespace_exists: + return False + + # Check if train-ticket deployment exists + deployment_exists = self.kubectl.exec_command(f"kubectl get deployment -n {self.namespace}") + if "No resources found" in deployment_exists or not deployment_exists.strip(): + return False + + return True + except Exception as e: + print(f"[TrainTicket] Warning: Failed to check deployment status: {e}") + return False + + def cleanup(self): + """Cleanup the train-ticket application if it's not currently deployed.""" + if self._is_train_ticket_deployed(): + print( + f"[TrainTicket] Skipping cleanup: train-ticket app is currently deployed in namespace {self.namespace}" + ) + return + + # Helm.uninstall(**self.helm_configs) + if self.namespace: + self.kubectl.delete_namespace(self.namespace) + + def create_workload(self): + """Create workload manager for log collection (like astronomy shop).""" + self.wrk = LocustWorkloadManager( + namespace=self.namespace, + locust_url="load-generator:8089", + ) + + def start_workload(self): + """Start workload log collection (like astronomy shop).""" + if not hasattr(self, "wrk"): + self.create_workload() + self.wrk.start() + print("[TrainTicket] Workload log collection started") + + def stop_workload(self): + if hasattr(self, "wrk"): + self.wrk.stop() + print("[TrainTicket] Workload log collection stopped") + + def _deploy_flagd_infrastructure(self): + try: + flagd_templates_path = TARGET_MICROSERVICES / "train-ticket" / "templates" + + if (flagd_templates_path / "flagd-deployment.yaml").exists(): + result = self.kubectl.exec_command(f"kubectl apply -f {flagd_templates_path / 'flagd-deployment.yaml'}") + print(f"[TrainTicket] Deployed flagd service: {result}") + + if (flagd_templates_path / "flagd-config.yaml").exists(): + result = self.kubectl.exec_command(f"kubectl apply -f {flagd_templates_path / 'flagd-config.yaml'}") + print(f"[TrainTicket] Deployed flagd ConfigMap: {result}") + + print(f"[TrainTicket] flagd infrastructure deployed successfully") + + except Exception as e: + print(f"[TrainTicket] Warning: Failed to deploy flagd infrastructure: {e}") + + def _deploy_load_generator(self): + try: + + locustfile_path = Path(__file__).parent.parent.parent / "resources" / "trainticket" / "locustfile.py" + + if locustfile_path.exists(): + result = self.kubectl.exec_command( + f"kubectl create configmap locustfile-config --from-file=locustfile.py={locustfile_path} -n {self.namespace} --dry-run=client -o yaml | kubectl apply -f -" + ) + print(f"[TrainTicket] Created ConfigMap from file: {result}") + + deployment_path = ( + Path(__file__).parent.parent.parent / "resources" / "trainticket" / "locust-deployment.yaml" + ) + + if deployment_path.exists(): + with open(deployment_path, "r") as f: + content = f.read() + + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as tmp: + tmp.write(content) + temp_path = tmp.name + + result = self.kubectl.exec_command(f"kubectl apply -f {temp_path}") + os.unlink(temp_path) + print(f"[TrainTicket] Deployed load generator: {result}") + + print("[TrainTicket] Load generator deployed with auto-start") + + except Exception as e: + print(f"[TrainTicket] Warning: Failed to deploy load generator: {e}") + + +# if __name__ == "__main__": +# app = TrainTicket() +# app.deploy() +# app.delete() diff --git a/sregym/service/dm_dust_manager.py b/sregym/service/dm_dust_manager.py new file mode 100644 index 0000000..2bf83d7 --- /dev/null +++ b/sregym/service/dm_dust_manager.py @@ -0,0 +1,242 @@ +import json +import shlex +import subprocess +import time +from typing import Dict, List, Optional + +from sregym.service.kubectl import KubeCtl + +# Constants +DEFAULT_KHAOS_NS = "khaos" +DEFAULT_KHAOS_LABEL = "app=khaos" +DM_DUST_DEVICE_NAME = "openebs_dust" +DM_DUST_BACKING_FILE = "/var/tmp/openebs_dm_dust.img" +DM_DUST_BACKING_FILE_SIZE_GB = 5 +OPENEBS_LOCAL_PATH = "/var/openebs/local" +DEFAULT_BLOCK_SIZE = 512 +SETUP_TIMEOUT_SECONDS = 120 + + +class DmDustManager: + """ + Manages dm-dust infrastructure setup for fault injection. + + This class sets up dm-dust devices to intercept all OpenEBS local storage, + allowing any application using OpenEBS to have fault injection capabilities + without needing to know specific service names or PVC details. + + The setup process: + 1. Creates a large dm-dust device + 2. Mounts it at /var/openebs/local + 3. All PVs created by OpenEBS will automatically use this dm-dust device + """ + + def __init__( + self, + kubectl: KubeCtl, + khaos_ns: str = DEFAULT_KHAOS_NS, + khaos_label: str = DEFAULT_KHAOS_LABEL, + ): + self.kubectl = kubectl + self.khaos_ns = khaos_ns + self.khaos_label = khaos_label + self._pod_cache: Dict[str, str] = {} # Cache pod names by node + + def setup_openebs_dm_dust_infrastructure(self, nodes: Optional[List[str]] = None) -> None: + """ + Set up dm-dust to intercept all OpenEBS local storage on the specified nodes. + Creates a dm-dust device that will be used for all PVs created in /var/openebs/local/. + + Args: + nodes: List of node names to set up. If None, sets up on all nodes in the cluster. + """ + if nodes is None: + nodes_response = self.kubectl.list_nodes() + nodes = [node.metadata.name for node in nodes_response.items] + + if not nodes: + raise RuntimeError("No nodes available for dm-dust setup") + + for node in nodes: + try: + self._setup_dm_dust_on_node(node) + print(f"[dm-dust] ✅ Set up dm-dust infrastructure on {node}") + except Exception as e: + print(f"[dm-dust] ❌ Failed to set up dm-dust on {node}: {e}") + raise + + def _setup_dm_dust_on_node(self, node: str) -> None: + """Set up dm-dust device to intercept OpenEBS storage on a single node.""" + print(f"[dm-dust] Setting up dm-dust on {node}...") + + # Build the complete setup script from logical sections + script_parts = [ + self._build_module_check_script(), + self._build_cleanup_script(), + self._build_backing_file_script(), + self._build_dm_dust_create_script(), + self._build_mount_script(), + ] + + full_script = "set -e\n" + "\n".join(script_parts) + + # Execute using nsenter to access host namespace + pod = self._get_khaos_pod_on_node(node) + cmd = [ + "kubectl", + "-n", + self.khaos_ns, + "exec", + pod, + "--", + "nsenter", + "-t", + "1", + "-m", + "-u", + "-i", + "-n", + "-p", + "sh", + "-c", + full_script, + ] + + try: + rc = subprocess.run(cmd, timeout=SETUP_TIMEOUT_SECONDS, capture_output=True, text=True) + if rc.returncode != 0: + error_msg = f"Failed to setup dm-dust on {node}: return code {rc.returncode}" + if rc.stderr: + error_msg += f"\nStderr: {rc.stderr}" + if rc.stdout: + error_msg += f"\nStdout: {rc.stdout}" + raise RuntimeError(error_msg) + except subprocess.TimeoutExpired: + raise RuntimeError(f"Timeout setting up dm-dust on {node} after {SETUP_TIMEOUT_SECONDS} seconds") + + def _build_module_check_script(self) -> str: + """Build script to check and load dm_dust module.""" + return f""" +echo 'Setting up dm-dust for OpenEBS local storage...' +echo 'Checking dm_dust module...' +modprobe dm_dust || {{ echo 'Failed to load dm_dust module'; exit 1; }} +lsmod | grep dm_dust || {{ echo 'dm_dust module not found in lsmod'; exit 1; }} +echo 'Checking device-mapper targets...' +dmsetup targets | grep dust || {{ echo 'dust target not available in dmsetup'; exit 1; }} +""" + + def _build_cleanup_script(self) -> str: + """Build script to clean up existing dm-dust infrastructure.""" + openebs_path = OPENEBS_LOCAL_PATH + return f""" +DM_NAME={DM_DUST_DEVICE_NAME} +BACKING_FILE={shlex.quote(DM_DUST_BACKING_FILE)} + +echo 'Cleaning up any existing dm-dust infrastructure...' + +# Unmount if mounted +if mountpoint -q {shlex.quote(openebs_path)} 2>/dev/null; then + echo 'Unmounting {openebs_path}...' + umount {shlex.quote(openebs_path)} 2>/dev/null || umount -f {shlex.quote(openebs_path)} 2>/dev/null || true + sleep 1 +fi + +# Remove existing dm device +if dmsetup info $DM_NAME >/dev/null 2>&1; then + echo 'Found existing device $DM_NAME, attempting removal...' + mount | grep "/dev/mapper/$DM_NAME" | awk '{{print $3}}' | xargs -r -I {{}} umount -l {{}} 2>/dev/null || true + sleep 1 + if dmsetup remove $DM_NAME 2>/dev/null; then + echo 'Device removed successfully' + elif dmsetup remove --force $DM_NAME 2>/dev/null; then + echo 'Device removed with --force' + else + echo 'Device is busy, renaming and marking for deferred removal...' + timestamp=$(date +%s) + dmsetup rename $DM_NAME ${{DM_NAME}}_old_${{timestamp}} 2>/dev/null || true + dmsetup remove --deferred ${{DM_NAME}}_old_${{timestamp}} 2>/dev/null || true + echo 'Old device will be cleaned up automatically when kernel releases it' + fi +fi + +# Clean up backing file and loop devices +if [ -f $BACKING_FILE ]; then + echo 'Cleaning up old backing file and loop devices...' + losetup -j $BACKING_FILE 2>/dev/null | awk -F: '{{print $1}}' | xargs -r losetup -d 2>/dev/null || true + rm -f $BACKING_FILE +fi +""" + + def _build_backing_file_script(self) -> str: + """Build script to create backing file and loop device.""" + openebs_path = OPENEBS_LOCAL_PATH + return f""" +BACKING_FILE={shlex.quote(DM_DUST_BACKING_FILE)} + +echo 'Preparing OpenEBS directory at {openebs_path}...' +rm -rf {shlex.quote(openebs_path)}/* 2>/dev/null || true +mkdir -p {shlex.quote(openebs_path)} + +echo 'Creating {DM_DUST_BACKING_FILE_SIZE_GB}GB backing file for OpenEBS dm-dust...' +dd if=/dev/zero of=$BACKING_FILE bs=1M count={DM_DUST_BACKING_FILE_SIZE_GB * 1024} + +echo 'Setting up loop device...' +LOOP_DEV=$(losetup -f --show $BACKING_FILE) +echo "Loop device: $LOOP_DEV" +""" + + def _build_dm_dust_create_script(self) -> str: + """Build script to create and format dm-dust device.""" + return f""" +DM_NAME={DM_DUST_DEVICE_NAME} +SECTORS=$(blockdev --getsz $LOOP_DEV) +echo "Sectors: $SECTORS" + +echo 'Creating healthy dm-dust device for OpenEBS...' +echo 'Running dmsetup create command...' +dmsetup create $DM_NAME --table "0 $SECTORS dust $LOOP_DEV 0 {DEFAULT_BLOCK_SIZE}" || {{ + echo 'dmsetup create failed' + dmsetup targets + exit 1 +}} + +echo 'dmsetup create completed successfully' +echo 'Verifying dm device was created...' +ls -la /dev/mapper/$DM_NAME || {{ echo 'dm device not found'; exit 1; }} + +echo 'Formatting dm-dust device with ext4...' +mkfs.ext4 -F /dev/mapper/$DM_NAME || {{ echo 'mkfs.ext4 failed'; exit 1; }} +""" + + def _build_mount_script(self) -> str: + """Build script to mount dm-dust device and set permissions.""" + openebs_path = OPENEBS_LOCAL_PATH + return f""" +DM_NAME={DM_DUST_DEVICE_NAME} +echo 'Mounting dm-dust device at {openebs_path}...' +mount /dev/mapper/$DM_NAME {shlex.quote(openebs_path)} + +echo 'Setting proper permissions...' +chmod 755 {shlex.quote(openebs_path)} + +echo 'OpenEBS dm-dust infrastructure ready - all PVs will use dm-dust' +""" + + def _get_khaos_pod_on_node(self, node: str) -> str: + """Find a running Khaos pod on the specified node, with caching.""" + if node in self._pod_cache: + return self._pod_cache[node] + + cmd = f"kubectl -n {shlex.quote(self.khaos_ns)} get pods -l {shlex.quote(self.khaos_label)} -o json" + out = self.kubectl.exec_command(cmd) + if not out: + raise RuntimeError(f"Failed to get pods: empty response") + + data = json.loads(out) + for item in data.get("items", []): + if item.get("spec", {}).get("nodeName") == node and item.get("status", {}).get("phase") == "Running": + pod_name = item["metadata"]["name"] + self._pod_cache[node] = pod_name + return pod_name + + raise RuntimeError(f"No running Khaos pod found on node {node}") diff --git a/sregym/service/dm_flakey_manager.py b/sregym/service/dm_flakey_manager.py new file mode 100644 index 0000000..511ef56 --- /dev/null +++ b/sregym/service/dm_flakey_manager.py @@ -0,0 +1,243 @@ +import json +import shlex +import subprocess +import time +from typing import Dict, List, Optional + +from sregym.service.kubectl import KubeCtl + +# Constants +DEFAULT_KHAOS_NS = "khaos" +DEFAULT_KHAOS_LABEL = "app=khaos" +DM_FLAKEY_DEVICE_NAME = "openebs_flakey" +DM_FLAKEY_BACKING_FILE = "/var/tmp/openebs_dm_flakey.img" +DM_FLAKEY_BACKING_FILE_SIZE_GB = 5 +OPENEBS_LOCAL_PATH = "/var/openebs/local" +DEFAULT_BLOCK_SIZE = 512 +SETUP_TIMEOUT_SECONDS = 120 + + +class DmFlakeyManager: + """ + Manages dm-flakey infrastructure setup for fault injection. + + This class sets up dm-flakey devices to intercept all OpenEBS local storage, + allowing any application using OpenEBS to have fault injection capabilities + without needing to know specific service names or PVC details. + + The setup process: + 1. Creates a large dm-flakey device + 2. Mounts it at /var/openebs/local + 3. All PVs created by OpenEBS will automatically use this dm-flakey device + """ + + def __init__( + self, + kubectl: KubeCtl, + khaos_ns: str = DEFAULT_KHAOS_NS, + khaos_label: str = DEFAULT_KHAOS_LABEL, + ): + self.kubectl = kubectl + self.khaos_ns = khaos_ns + self.khaos_label = khaos_label + self._pod_cache: Dict[str, str] = {} # Cache pod names by node + + def setup_openebs_dm_flakey_infrastructure(self, nodes: Optional[List[str]] = None) -> None: + """ + Set up dm-flakey to intercept all OpenEBS local storage on the specified nodes. + Creates a dm-flakey device that will be used for all PVs created in /var/openebs/local/. + + Args: + nodes: List of node names to set up. If None, sets up on all nodes in the cluster. + """ + if nodes is None: + nodes_response = self.kubectl.list_nodes() + nodes = [node.metadata.name for node in nodes_response.items] + + if not nodes: + raise RuntimeError("No nodes available for dm-flakey setup") + + for node in nodes: + try: + self._setup_dm_flakey_on_node(node) + print(f"[dm-flakey] ✅ Set up dm-flakey infrastructure on {node}") + except Exception as e: + print(f"[dm-flakey] ❌ Failed to set up dm-flakey on {node}: {e}") + raise + + def _setup_dm_flakey_on_node(self, node: str) -> None: + """Set up dm-flakey device to intercept OpenEBS storage on a single node.""" + print(f"[dm-flakey] Setting up dm-flakey on {node}...") + + # Build the complete setup script from logical sections + script_parts = [ + self._build_module_check_script(), + self._build_cleanup_script(), + self._build_backing_file_script(), + self._build_dm_flakey_create_script(), + self._build_mount_script(), + ] + + full_script = "set -e\n" + "\n".join(script_parts) + + # Execute using nsenter to access host namespace + pod = self._get_khaos_pod_on_node(node) + cmd = [ + "kubectl", + "-n", + self.khaos_ns, + "exec", + pod, + "--", + "nsenter", + "-t", + "1", + "-m", + "-u", + "-i", + "-n", + "-p", + "sh", + "-c", + full_script, + ] + + try: + rc = subprocess.run(cmd, timeout=SETUP_TIMEOUT_SECONDS, capture_output=True, text=True) + if rc.returncode != 0: + error_msg = f"Failed to setup dm-flakey on {node}: return code {rc.returncode}" + if rc.stderr: + error_msg += f"\nStderr: {rc.stderr}" + if rc.stdout: + error_msg += f"\nStdout: {rc.stdout}" + raise RuntimeError(error_msg) + except subprocess.TimeoutExpired: + raise RuntimeError(f"Timeout setting up dm-flakey on {node} after {SETUP_TIMEOUT_SECONDS} seconds") + + def _build_module_check_script(self) -> str: + """Build script to check and load dm_flakey module.""" + return f""" +echo 'Setting up dm-flakey for OpenEBS local storage...' +echo 'Checking dm_flakey module...' +modprobe dm_flakey || {{ echo 'Failed to load dm_flakey module'; exit 1; }} +lsmod | grep dm_flakey || {{ echo 'dm_flakey module not found in lsmod'; exit 1; }} +echo 'Checking device-mapper targets...' +dmsetup targets | grep flakey || {{ echo 'flakey target not available in dmsetup'; exit 1; }} +""" + + def _build_cleanup_script(self) -> str: + """Build script to clean up existing dm-flakey infrastructure.""" + openebs_path = OPENEBS_LOCAL_PATH + return f""" +DM_NAME={DM_FLAKEY_DEVICE_NAME} +BACKING_FILE={shlex.quote(DM_FLAKEY_BACKING_FILE)} + +echo 'Cleaning up any existing dm-flakey infrastructure...' + +# Unmount if mounted +if mountpoint -q {shlex.quote(openebs_path)} 2>/dev/null; then + echo 'Unmounting {openebs_path}...' + umount {shlex.quote(openebs_path)} 2>/dev/null || umount -f {shlex.quote(openebs_path)} 2>/dev/null || true + sleep 1 +fi + +# Remove existing dm device +if dmsetup info $DM_NAME >/dev/null 2>&1; then + echo 'Found existing device $DM_NAME, attempting removal...' + mount | grep "/dev/mapper/$DM_NAME" | awk '{{print $3}}' | xargs -r -I {{}} umount -l {{}} 2>/dev/null || true + sleep 1 + if dmsetup remove $DM_NAME 2>/dev/null; then + echo 'Device removed successfully' + elif dmsetup remove --force $DM_NAME 2>/dev/null; then + echo 'Device removed with --force' + else + echo 'Device is busy, renaming and marking for deferred removal...' + timestamp=$(date +%s) + dmsetup rename $DM_NAME ${{DM_NAME}}_old_${{timestamp}} 2>/dev/null || true + dmsetup remove --deferred ${{DM_NAME}}_old_${{timestamp}} 2>/dev/null || true + echo 'Old device will be cleaned up automatically when kernel releases it' + fi +fi + +# Clean up backing file and loop devices +if [ -f $BACKING_FILE ]; then + echo 'Cleaning up old backing file and loop devices...' + losetup -j $BACKING_FILE 2>/dev/null | awk -F: '{{print $1}}' | xargs -r losetup -d 2>/dev/null || true + rm -f $BACKING_FILE +fi +""" + + def _build_backing_file_script(self) -> str: + """Build script to create backing file and loop device.""" + openebs_path = OPENEBS_LOCAL_PATH + return f""" +BACKING_FILE={shlex.quote(DM_FLAKEY_BACKING_FILE)} + +echo 'Preparing OpenEBS directory at {openebs_path}...' +rm -rf {shlex.quote(openebs_path)}/* 2>/dev/null || true +mkdir -p {shlex.quote(openebs_path)} + +echo 'Creating {DM_FLAKEY_BACKING_FILE_SIZE_GB}GB backing file for OpenEBS dm-flakey...' +dd if=/dev/zero of=$BACKING_FILE bs=1M count={DM_FLAKEY_BACKING_FILE_SIZE_GB * 1024} + +echo 'Setting up loop device...' +LOOP_DEV=$(losetup -f --show $BACKING_FILE) +echo "Loop device: $LOOP_DEV" +""" + + def _build_dm_flakey_create_script(self) -> str: + """Build script to create and format dm-flakey device.""" + return f""" +DM_NAME={DM_FLAKEY_DEVICE_NAME} +SECTORS=$(blockdev --getsz $LOOP_DEV) +echo "Sectors: $SECTORS" + +echo 'Creating healthy dm-flakey device for OpenEBS...' +echo 'Running dmsetup create command...' +# up=1, down=0 means always up (pass-through) +dmsetup create $DM_NAME --table "0 $SECTORS flakey $LOOP_DEV 0 1 0" || {{ + echo 'dmsetup create failed' + dmsetup targets + exit 1 +}} + +echo 'dmsetup create completed successfully' +echo 'Verifying dm device was created...' +ls -la /dev/mapper/$DM_NAME || {{ echo 'dm device not found'; exit 1; }} + +echo 'Formatting dm-flakey device with ext4...' +mkfs.ext4 -F /dev/mapper/$DM_NAME || {{ echo 'mkfs.ext4 failed'; exit 1; }} +""" + + def _build_mount_script(self) -> str: + """Build script to mount dm-flakey device and set permissions.""" + openebs_path = OPENEBS_LOCAL_PATH + return f""" +DM_NAME={DM_FLAKEY_DEVICE_NAME} +echo 'Mounting dm-flakey device at {openebs_path}...' +mount /dev/mapper/$DM_NAME {shlex.quote(openebs_path)} + +echo 'Setting proper permissions...' +chmod 755 {shlex.quote(openebs_path)} + +echo 'OpenEBS dm-flakey infrastructure ready - all PVs will use dm-flakey' +""" + + def _get_khaos_pod_on_node(self, node: str) -> str: + """Find a running Khaos pod on the specified node, with caching.""" + if node in self._pod_cache: + return self._pod_cache[node] + + cmd = f"kubectl -n {shlex.quote(self.khaos_ns)} get pods -l {shlex.quote(self.khaos_label)} -o json" + out = self.kubectl.exec_command(cmd) + if not out: + raise RuntimeError(f"Failed to get pods: empty response") + + data = json.loads(out) + for item in data.get("items", []): + if item.get("spec", {}).get("nodeName") == node and item.get("status", {}).get("phase") == "Running": + pod_name = item["metadata"]["name"] + self._pod_cache[node] = pod_name + return pod_name + + raise RuntimeError(f"No running Khaos pod found on node {node}") diff --git a/sregym/service/helm.py b/sregym/service/helm.py new file mode 100644 index 0000000..4ad862d --- /dev/null +++ b/sregym/service/helm.py @@ -0,0 +1,227 @@ +"""Interface for helm operations""" + +import subprocess +import time +import logging +from sregym.service.kubectl import KubeCtl + +local_logger = logging.getLogger("all.infra.helm") +local_logger.propagate = True +local_logger.setLevel(logging.DEBUG) +class Helm: + @staticmethod + def install(**args): + """Install a helm chart + + Args: + release_name (str): Name of the release + chart_path (str): Path to the helm chart + namespace (str): Namespace to install the chart + version (str): Version of the chart + extra_args (List[str)]: Extra arguments for the helm install command + remote_chart (bool): Whether the chart is remote (from a Helm repo) + """ + + release_name = args.get("release_name") + chart_path = args.get("chart_path") + namespace = args.get("namespace") + version = args.get("version") + extra_args = args.get("extra_args") + remote_chart = args.get("remote_chart", False) + + local_logger.info(f"Helm Install: {release_name} in namespace {namespace}") + + if not remote_chart: + # Install dependencies for chart before installation + dependency_command = f"helm dependency update {chart_path}" + dependency_process = subprocess.Popen( + dependency_command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + dependency_output, dependency_error = dependency_process.communicate() + + command = f"helm install {release_name} {chart_path} -n {namespace} --create-namespace" + + if version: + command += f" --version {version}" + + if extra_args: + command += " " + " ".join(extra_args) + + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) + output, error = process.communicate() + + if error: + stderr = error.decode("utf-8").strip() + stdout = output.decode("utf-8").strip() + raise RuntimeError( + f"Helm install failed for release '{release_name}' in namespace '{namespace}'. " + f"Chart: {chart_path}. Error output:\n{stderr}\n" + f"Stdout (for context):\n{stdout}" + ) + else: + local_logger.debug(output.decode("utf-8")) + + @staticmethod + def uninstall(**args): + """Uninstall a helm chart + + Args: + release_name (str): Name of the release + namespace (str): Namespace to uninstall the chart + """ + release_name = args.get("release_name") + namespace = args.get("namespace") + + local_logger.info(f"Helm Uninstall: {release_name} in namespace {namespace}") + + if not Helm.exists_release(release_name, namespace): + local_logger.warning(f"Release {release_name} does not exist. Skipping uninstall.") + return + + command = f"helm uninstall {release_name} -n {namespace}" + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) + output, error = process.communicate() + + if error: + stderr = error.decode("utf-8").strip() + stdout = output.decode("utf-8").strip() + raise RuntimeError( + f"Helm uninstall failed for release '{release_name}' in namespace '{namespace}'. " + f"Release: {release_name}. Error output:\n{stderr}\n" + f"Stdout (for context):\n{stdout}" + ) + else: + local_logger.debug(output.decode("utf-8")) + + @staticmethod + def exists_release(release_name: str, namespace: str) -> bool: + """Check if a Helm release exists + + Args: + release_name (str): Name of the release + namespace (str): Namespace to check + + Returns: + bool: True if release exists + """ + command = f"helm list -n {namespace}" + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) + output, error = process.communicate() + + if error: + local_logger.error(error.decode("utf-8")) + return False + else: + return release_name in output.decode("utf-8") + + @staticmethod + def assert_if_deployed(namespace: str): + """Assert if all services in the application are deployed + + Args: + namespace (str): Namespace to check + + Returns: + bool: True if deployed + + Raises: + Exception: If not deployed + """ + kubectl = KubeCtl() + try: + kubectl.wait_for_ready(namespace) + except Exception as e: + raise e + + return True + + @staticmethod + def upgrade(**args): + """Upgrade a helm chart + + Args: + release_name (str): Name of the release + chart_path (str): Path to the helm chart + namespace (str): Namespace to upgrade the chart + values_file (str): Path to the values.yaml file + set_values (dict): Key-value pairs for --set options + """ + + release_name = args.get("release_name") + chart_path = args.get("chart_path") + namespace = args.get("namespace") + values_file = args.get("values_file") + set_values = args.get("set_values", {}) + + local_logger.info(f"Helm Upgrade: {release_name} in namespace {namespace}") + + command = [ + "helm", + "upgrade", + release_name, + chart_path, + "-n", + namespace, + "-f", + values_file, + ] + + # Add --set options if provided + for key, value in set_values.items(): + command.append("--set") + command.append(f"{key}={value}") + + process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, error = process.communicate() + + if error: + local_logger.error("Error during helm upgrade:") + stderr = error.decode("utf-8").strip() + stdout = output.decode("utf-8").strip() + raise RuntimeError( + f"Helm install failed for release '{release_name}' in namespace '{namespace}'. " + f"Chart: {chart_path}. Error output:\n{stderr}\n" + f"Stdout (for context):\n{stdout}" + ) + else: + local_logger.info("Helm upgrade successful!") + local_logger.debug(output.decode("utf-8")) + + @staticmethod + def add_repo(name: str, url: str): + """Add a Helm repository + + Args: + name (str): Name of the repository + url (str): URL of the repository + """ + local_logger.info(f"Helm Repo Add: {name} with url {url}") + command = f"helm repo add {name} {url}" + process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, error = process.communicate() + + if error: + local_logger.error(f"Error adding helm repo {name}: {error.decode('utf-8')}") + stderr = error.decode("utf-8").strip() + stdout = output.decode("utf-8").strip() + raise RuntimeError( + f"Helm upgrade failed for release '{name}' and url {url}. " + f"Error output:\n{stderr}\n" + f"Stdout (for context):\n{stdout}" + ) + else: + local_logger.info(f"Helm repo {name} added successfully: {output.decode('utf-8')}") + + +# Example usage +if __name__ == "__main__": + sn_configs = { + "release_name": "social-network", + "chart_path": "/home/oppertune/DeathStarBench/socialNetwork/helm-chart/socialnetwork", + "namespace": "social-network", + } + Helm.install(**sn_configs) + Helm.uninstall(**sn_configs) diff --git a/sregym/service/khaos.py b/sregym/service/khaos.py new file mode 100644 index 0000000..7ffab8d --- /dev/null +++ b/sregym/service/khaos.py @@ -0,0 +1,73 @@ +import json +import time + +from sregym.paths import KHAOS_DS +from sregym.service.kubectl import KubeCtl + +KHAOS_NS = "khaos" +KHAOS_DS_NAME = "khaos" + + +class KhaosController: + def __init__(self, kubectl: KubeCtl): + self.kubectl = kubectl + + def ensure_deployed(self): + if self.kubectl.is_emulated_cluster(): + raise RuntimeError("Khaos cannot be deployed on emulated clusters (kind, minikube, k3d, etc.).") + + rc = self.kubectl.exec_command(f"kubectl get ns {KHAOS_NS} >/dev/null 2>&1 || kubectl create ns {KHAOS_NS}") + + cmd = f"kubectl apply -f {KHAOS_DS}" + out = self.kubectl.exec_command(cmd) + if isinstance(out, tuple): + stdout, stderr, rc = (out + ("",))[:3] + if rc not in (0, None): + raise RuntimeError(f"kubectl apply failed (rc={rc}).\nSTDOUT:\n{stdout}\nSTDERR:\n{stderr}") + + # Wait for both DaemonSets to be ready (control-plane and worker) + # The YAML file contains two DaemonSets: khaos-control-plane and khaos-worker + self.kubectl.exec_command(f"kubectl -n {KHAOS_NS} rollout status ds/khaos-control-plane --timeout=3m") + self.kubectl.exec_command(f"kubectl -n {KHAOS_NS} rollout status ds/khaos-worker --timeout=3m") + + def teardown(self): + self.kubectl.exec_command(f"kubectl delete ns {KHAOS_NS} --ignore-not-found") + + def _khaos_pod_on_node(self, node_name: str) -> str: + deadline = time.time() + 90 + while time.time() < deadline: + out = self.kubectl.exec_command(f"kubectl -n {KHAOS_NS} get pods -o json") + if isinstance(out, tuple): + out = out[0] + data = json.loads(out or "{}") + for item in data.get("items", []): + if ( + item.get("spec", {}).get("nodeName") == node_name + and item.get("status", {}).get("phase") == "Running" + ): + return item["metadata"]["name"] + time.sleep(3) + # diagnostics + ds = self.kubectl.exec_command(f"kubectl -n {KHAOS_NS} get ds -o wide") + pods = self.kubectl.exec_command(f"kubectl -n {KHAOS_NS} get pods -o wide") + raise RuntimeError( + f"No running Khaos pod on node {node_name} after 90s.\n" + f"DaemonSets:\n{ds[0] if isinstance(ds, tuple) else ds}\n" + f"Pods:\n{pods[0] if isinstance(pods, tuple) else pods}" + ) + + def inject(self, node_name: str, fault_name: str, host_pid: int): + """ + Run: /khaos/khaos + inside the Khaos pod on the specified node. + """ + pod = self._khaos_pod_on_node(node_name) + cmd = f"kubectl -n {KHAOS_NS} exec {pod} -- /khaos/khaos {fault_name} {host_pid}" + out = self.kubectl.exec_command(cmd) + return out + + def recover(self, node_name: str, fault_name: str): + pod = self._khaos_pod_on_node(node_name) + cmd = f"kubectl -n {KHAOS_NS} exec {pod} -- /khaos/khaos --recover {fault_name}" + out = self.kubectl.exec_command(cmd) + return out diff --git a/sregym/service/khaos.yaml b/sregym/service/khaos.yaml new file mode 100644 index 0000000..e8af697 --- /dev/null +++ b/sregym/service/khaos.yaml @@ -0,0 +1,128 @@ +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: khaos-control-plane + namespace: khaos +spec: + selector: + matchLabels: + app: khaos + profile: control-plane + template: + metadata: + labels: + app: khaos + profile: control-plane + spec: + hostPID: true + hostNetwork: true + automountServiceAccountToken: false + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["amd64"] + - key: node-role.kubernetes.io/control-plane + operator: Exists + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["amd64"] + - key: node-role.kubernetes.io/master + operator: Exists + tolerations: + - key: "node-role.kubernetes.io/control-plane" + operator: "Exists" + effect: "NoSchedule" + - key: "node-role.kubernetes.io/master" + operator: "Exists" + effect: "NoSchedule" + containers: + - name: khaos + image: saad1038/khaos-x86:latest + imagePullPolicy: IfNotPresent + command: ["sleep", "infinity"] + securityContext: + privileged: true + volumeMounts: + - { name: sys-bpf, mountPath: /sys/fs/bpf } + - { name: sys-debug, mountPath: /sys/kernel/debug } + - { name: host-proc, mountPath: /host/proc, readOnly: true } + - { name: lib-modules, mountPath: /lib/modules, readOnly: true } + - { name: btf, mountPath: /sys/kernel/btf, readOnly: true } + - { name: run-dir, mountPath: /run, readOnly: true } + - { name: var-run, mountPath: /var/run, readOnly: true } + - { name: host-dev, mountPath: /dev } + volumes: + - { name: sys-bpf, hostPath: { path: /sys/fs/bpf, type: Directory } } + - { name: sys-debug, hostPath: { path: /sys/kernel/debug, type: Directory } } + - { name: host-proc, hostPath: { path: /proc, type: Directory } } + - { name: lib-modules, hostPath: { path: /lib/modules, type: Directory } } + - { name: btf, hostPath: { path: /sys/kernel/btf, type: DirectoryOrCreate } } + - { name: run-dir, hostPath: { path: /run, type: Directory } } + - { name: var-run, hostPath: { path: /var/run, type: Directory } } + - { name: host-dev, hostPath: { path: /dev, type: Directory } } +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: khaos-worker + namespace: khaos +spec: + selector: + matchLabels: + app: khaos + profile: worker + template: + metadata: + labels: + app: khaos + profile: worker + spec: + hostPID: true + hostNetwork: true + automountServiceAccountToken: false + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: ["amd64"] + - key: node-role.kubernetes.io/worker + operator: Exists + nodeSelector: + kubernetes.io/arch: amd64 + node-role.kubernetes.io/worker: "" # explicit worker targeting + containers: + - name: khaos + image: saad1038/khaos-x86:latest + imagePullPolicy: IfNotPresent + command: ["sleep", "infinity"] + securityContext: + privileged: true + volumeMounts: + - { name: sys-bpf, mountPath: /sys/fs/bpf } + - { name: sys-debug, mountPath: /sys/kernel/debug } + - { name: host-proc, mountPath: /host/proc, readOnly: true } + - { name: lib-modules, mountPath: /lib/modules, readOnly: true } + - { name: btf, mountPath: /sys/kernel/btf, readOnly: true } + - { name: run-dir, mountPath: /run, readOnly: true } + - { name: var-run, mountPath: /var/run, readOnly: true } + - { name: openebs, mountPath: /var/openebs } + - { name: host-dev, mountPath: /dev } + volumes: + - { name: sys-bpf, hostPath: { path: /sys/fs/bpf, type: Directory } } + - { name: sys-debug, hostPath: { path: /sys/kernel/debug, type: Directory } } + - { name: host-proc, hostPath: { path: /proc, type: Directory } } + - { name: lib-modules, hostPath: { path: /lib/modules, type: Directory } } + - { name: btf, hostPath: { path: /sys/kernel/btf, type: DirectoryOrCreate } } + - { name: run-dir, hostPath: { path: /run, type: Directory } } + - { name: var-run, hostPath: { path: /var/run, type: Directory } } + - { name: openebs, hostPath: { path: /var/openebs, type: Directory } } + - { name: host-dev, hostPath: { path: /dev, type: Directory } } \ No newline at end of file diff --git a/sregym/service/kubectl.py b/sregym/service/kubectl.py new file mode 100644 index 0000000..601bdd5 --- /dev/null +++ b/sregym/service/kubectl.py @@ -0,0 +1,651 @@ +"""Interface to K8S controller service.""" + +import json +import logging +import subprocess +import time + +local_logger = logging.getLogger("all.infra.kubectl") +local_logger.propagate = True +local_logger.setLevel(logging.DEBUG) + +try: + from kubernetes import client, config +except ModuleNotFoundError as e: + local_logger.error("Your Kubeconfig is missing. Please set up a cluster.") + exit(1) +from kubernetes import dynamic +from kubernetes.client import api_client +from kubernetes.client.rest import ApiException +from rich.console import Console +import dotenv +import os + +dotenv.load_dotenv(override=True) + +WAIT_FOR_POD_READY_TIMEOUT = int(os.getenv("WAIT_FOR_POD_READY_TIMEOUT", "600")) + +class KubeCtl: + def __init__(self): + """Initialize the KubeCtl object and load the Kubernetes configuration.""" + try: + config.load_kube_config() + except Exception as e: + local_logger.error("Missing kubeconfig. Please set up a cluster.") + exit(1) + self.core_v1_api = client.CoreV1Api() + self.apps_v1_api = client.AppsV1Api() + + def list_namespaces(self): + """Return a list of all namespaces in the cluster.""" + return self.core_v1_api.list_namespace() + + def list_pods(self, namespace): + """Return a list of all pods within a specified namespace.""" + return self.core_v1_api.list_namespaced_pod(namespace) + + def list_services(self, namespace): + """Return a list of all services within a specified namespace.""" + return self.core_v1_api.list_namespaced_service(namespace) + + def list_nodes(self): + """Return a list of all running nodes.""" + return self.core_v1_api.list_node() + + def get_concise_deployments_info(self, namespace=None): + """Return a concise info of a deployment.""" + cmd = f"kubectl get deployment {f'-n {namespace}' if namespace else ''} -o wide" + result = self.exec_command(cmd) + return result + + def get_concise_pods_info(self, namespace=None): + """Return a concise info of a pod.""" + cmd = f"kubectl get pod {f'-n {namespace}' if namespace else ''} -o wide" + result = self.exec_command(cmd) + return result + + def list_deployments(self, namespace): + """Return a list of all deployments within a specified namespace.""" + return self.apps_v1_api.list_namespaced_deployment(namespace) + + def get_cluster_ip(self, service_name, namespace): + """Retrieve the cluster IP address of a specified service within a namespace.""" + service_info = self.core_v1_api.read_namespaced_service(service_name, namespace) + return service_info.spec.cluster_ip # type: ignore + + def get_container_runtime(self): + """ + Retrieve the container runtime used by the cluster. + If the cluster uses multiple container runtimes, the first one found will be returned. + """ + for node in self.core_v1_api.list_node().items: + for status in node.status.conditions: + if status.type == "Ready" and status.status == "True": + return node.status.node_info.container_runtime_version + + def get_pod_name(self, namespace, label_selector): + """Get the name of the first pod in a namespace that matches a given label selector.""" + pod_info = self.core_v1_api.list_namespaced_pod(namespace, label_selector=label_selector) + return pod_info.items[0].metadata.name + + def get_pod_logs(self, pod_name, namespace): + """Retrieve the logs of a specified pod within a namespace.""" + return self.core_v1_api.read_namespaced_pod_log(pod_name, namespace) + + def get_service_json(self, service_name, namespace, deserialize=True): + """Retrieve the JSON description of a specified service within a namespace.""" + command = f"kubectl get service {service_name} -n {namespace} -o json" + result = self.exec_command(command) + + return json.loads(result) if deserialize else result + + def get_deployment(self, name: str, namespace: str): + """Fetch the deployment configuration.""" + return self.apps_v1_api.read_namespaced_deployment(name, namespace) + + def get_namespace_deployment_status(self, namespace: str): + """Return the deployment status of an app within a namespace.""" + try: + deployed_services = self.apps_v1_api.list_namespaced_deployment(namespace) + return len(deployed_services.items) > 0 + except ApiException as e: + if e.status == 404: + local_logger.warning(f"Namespace {namespace} doesn't exist.") + return False + else: + raise e + + def get_service_deployment_status(self, service: str, namespace: str): + """Return the deployment status of a single service within a namespace.""" + try: + self.get_deployment(service, namespace) + return True + except ApiException as e: + if e.status == 404: + return False + else: + raise e + + def get_service(self, name: str, namespace: str): + """Fetch the service configuration.""" + return client.CoreV1Api().read_namespaced_service(name=name, namespace=namespace) + + def wait_for_ready(self, namespace, sleep=2, max_wait=WAIT_FOR_POD_READY_TIMEOUT): + """Wait for all pods in a namespace to be in a Ready state before proceeding.""" + + console = Console() + console.log(f"[bold yellow]Waiting for all pods in namespace '{namespace}' to be ready...") + + with console.status("[bold green]Waiting for pods to be ready...") as status: + wait = 0 + + while wait < max_wait: + try: + pod_list = self.list_pods(namespace) + + if pod_list.items: + ready_pods = [ + pod + for pod in pod_list.items + if pod.status.container_statuses and all(cs.ready for cs in pod.status.container_statuses) + ] + + if len(ready_pods) == len(pod_list.items): + console.log(f"[bold green]All pods in namespace '{namespace}' are ready.") + return + + except Exception as e: + console.log(f"[red]Error checking pod statuses: {e}") + + time.sleep(sleep) + wait += sleep + + raise Exception( + f"[red]Timeout: Not all pods in namespace '{namespace}' reached the Ready state within {max_wait} seconds." + ) + + def wait_for_namespace_deletion(self, namespace, sleep=2, max_wait=300): + """Wait for a namespace to be fully deleted before proceeding.""" + + console = Console() + console.log("[bold yellow]Waiting for namespace deletion...") + + wait = 0 + + while wait < max_wait: + try: + self.core_v1_api.read_namespace(name=namespace) + except Exception as e: + console.log(f"[bold green]Namespace '{namespace}' has been deleted.") + return + + time.sleep(sleep) + wait += sleep + + raise Exception(f"[red]Timeout: Namespace '{namespace}' was not deleted within {max_wait} seconds.") + + def is_ready(self, pod): + phase = pod.status.phase or "" + container_statuses = pod.status.container_statuses or [] + conditions = pod.status.conditions or [] + + if phase in ["Succeeded", "Failed"]: + return True + + if phase == "Running": + if all(cs.ready for cs in container_statuses): + return True + + for cs in container_statuses: + if cs.state and cs.state.waiting: + reason = cs.state.waiting.reason + if reason == "CrashLoopBackOff": + return True + + if phase == "Pending": + for cond in conditions: + if cond.type == "PodScheduled" and cond.status == "False": + return True + + return False + + def wait_for_stable(self, namespace: str, sleep: int = 2, max_wait: int = 300): + console = Console() + console.log(f"[bold yellow]Waiting for namespace '{namespace}' to be stable...") + + with console.status("[bold yellow]Waiting for pods to be stable...") as status: + wait = 0 + + while wait < max_wait: + try: + pod_list = self.list_pods(namespace) + + if pod_list.items: + + if all(self.is_ready(pod) for pod in pod_list.items): + console.log(f"[bold green]All pods in namespace '{namespace}' are stable.") + return + except Exception as e: + console.log(f"[red]Error checking pod statuses: {e}") + + time.sleep(sleep) + wait += sleep + + raise Exception(f"[red]Timeout: Namespace '{namespace}' was not deleted within {max_wait} seconds.") + + def delete_job(self, job_name: str = None, label: str = None, namespace: str = "default"): + """Delete a Kubernetes Job.""" + console = Console() + api_instance = client.BatchV1Api() + try: + if job_name: + api_instance.delete_namespaced_job( + name=job_name, namespace=namespace, body=client.V1DeleteOptions(propagation_policy="Foreground") + ) + console.log(f"[bold green]Job '{job_name}' deleted successfully.") + elif label: + # If label is provided, delete jobs by label + jobs = api_instance.list_namespaced_job(namespace=namespace, label_selector=label) + if jobs.items: + for job in jobs.items: + api_instance.delete_namespaced_job( + name=job.metadata.name, + namespace=namespace, + body=client.V1DeleteOptions(propagation_policy="Foreground"), + ) + console.log(f"[bold green]Job with label '{label}' deleted successfully.") + else: + console.log(f"[yellow]No jobs found with label '{label}' in namespace '{namespace}'.") + return True + except client.exceptions.ApiException as e: + if e.status == 404: + console.log(f"[yellow]Job '{job_name}' not found in namespace '{namespace}' (already deleted)") + return True + else: + console.log(f"[red]Error deleting job '{job_name}': {e}") + return False + except Exception as e: + console.log(f"[red]Unexpected error deleting job '{job_name}': {e}") + return False + + def wait_for_job_completion(self, job_name: str, namespace: str = "default", timeout: int = 600): + """Wait for a Kubernetes Job to complete successfully within a specified timeout.""" + api_instance = client.BatchV1Api() + console = Console() + start_time = time.time() + + console.log(f"[yellow]Waiting for job '{job_name}' to complete...") + with console.status("[bold green]Waiting for job to be done...") as status: + while time.time() - start_time < timeout: + try: + job = api_instance.read_namespaced_job(name=job_name, namespace=namespace) + + # Check job status conditions first (more reliable) + if job.status.conditions: + for condition in job.status.conditions: + if condition.type == "Complete" and condition.status == "True": + console.log(f"[bold green]Job '{job_name}' completed successfully!") + return + elif condition.type == "Failed" and condition.status == "True": + error_msg = f"Job '{job_name}' failed." + if condition.reason: + error_msg += f"\nReason: {condition.reason}" + if condition.message: + error_msg += f"\nMessage: {condition.message}" + console.log(f"[bold red]{error_msg}") + raise Exception(error_msg) + + # Check numeric status as fallback + succeeded = job.status.succeeded or 0 + failed = job.status.failed or 0 + + if succeeded > 0: + console.log(f"[bold green]Job '{job_name}' completed successfully! (succeeded: {succeeded})") + return + elif failed > 0: + console.log(f"[bold red]Job '{job_name}' failed! (failed: {failed})") + raise Exception(f"Job '{job_name}' failed.") + + time.sleep(2) + + except client.exceptions.ApiException as e: + if e.status == 404: + console.log(f"[red]Job '{job_name}' not found!") + raise Exception(f"Job '{job_name}' not found in namespace '{namespace}'") from e + else: + console.log(f"[red]Error checking job status: {e}") + raise + + console.log(f"[bold red]Timeout waiting for job '{job_name}' to complete!") + raise TimeoutError(f"Timeout: Job '{job_name}' did not complete within {timeout} seconds.") + + def update_deployment(self, name: str, namespace: str, deployment): + """Update the deployment configuration.""" + return self.apps_v1_api.replace_namespaced_deployment(name, namespace, deployment) + + def patch_deployment(self, name: str, namespace: str, patch_body: dict): + return self.apps_v1_api.patch_namespaced_deployment(name=name, namespace=namespace, body=patch_body) + + def patch_service(self, name, namespace, body): + """Patch a Kubernetes service in a specified namespace.""" + try: + api_response = self.core_v1_api.patch_namespaced_service(name, namespace, body) + return api_response + except ApiException as e: + local_logger.error(f"Exception when patching service: {e}\n") + return None + + def patch_custom_object(self, group, version, namespace, plural, name, body): + """Patch a custom Kubernetes object (e.g., Chaos Mesh CRD).""" + return self.custom_api.patch_namespaced_custom_object( + group=group, version=version, namespace=namespace, plural=plural, name=name, body=body + ) + + def create_configmap(self, name, namespace, data): + """Create or update a configmap from a dictionary of data.""" + try: + api_response = self.update_configmap(name, namespace, data) + return api_response + except ApiException as e: + if e.status == 404: + return self.create_new_configmap(name, namespace, data) + else: + local_logger.error(f"Exception when updating configmap: {e}\n") + local_logger.error(f"Exception status code: {e.status}\n") + return None + + def create_new_configmap(self, name, namespace, data): + """Create a new configmap.""" + config_map = client.V1ConfigMap( + api_version="v1", + kind="ConfigMap", + metadata=client.V1ObjectMeta(name=name), + data=data, + ) + try: + return self.core_v1_api.create_namespaced_config_map(namespace, config_map) + except ApiException as e: + local_logger.error(f"Exception when creating configmap: {e}\n") + return None + + def create_or_update_configmap(self, name: str, namespace: str, data: dict): + """Create a configmap if it doesn't exist, or update it if it does.""" + try: + existing_configmap = self.core_v1_api.read_namespaced_config_map(name, namespace) + # ConfigMap exists, update it + existing_configmap.data = data + self.core_v1_api.replace_namespaced_config_map(name, namespace, existing_configmap) + local_logger.info(f"ConfigMap '{name}' updated in namespace '{namespace}'") + except ApiException as e: + if e.status == 404: + # ConfigMap doesn't exist, create it + body = client.V1ConfigMap(metadata=client.V1ObjectMeta(name=name), data=data) + self.core_v1_api.create_namespaced_config_map(namespace, body) + local_logger.info(f"ConfigMap '{name}' created in namespace '{namespace}'") + else: + local_logger.error(f"Error creating/updating ConfigMap '{name}': {e}") + + def update_configmap(self, name, namespace, data): + """Update existing configmap with the provided data.""" + config_map = client.V1ConfigMap( + api_version="v1", + kind="ConfigMap", + metadata=client.V1ObjectMeta(name=name), + data=data, + ) + try: + return self.core_v1_api.replace_namespaced_config_map(name, namespace, config_map) + except ApiException as e: + local_logger.error(f"Exception when updating configmap: {e}\n") + return + + def apply_configs(self, namespace: str, config_path: str): + """Apply Kubernetes configurations from a specified path to a namespace.""" + command = f"kubectl apply -Rf {config_path} -n {namespace}" + self.exec_command(command) + + def delete_configs(self, namespace: str, config_path: str): + """Delete Kubernetes configurations from a specified path in a namespace.""" + try: + exists_resource = self.exec_command(f"kubectl get all -n {namespace} -o name") + if exists_resource: + local_logger.info(f"Deleting K8S configs in namespace: {namespace}") + command = f"kubectl delete -Rf {config_path} -n {namespace} --timeout=10s" + self.exec_command(command) + else: + local_logger.warning(f"No resources found in: {namespace}. Skipping deletion.") + except subprocess.CalledProcessError as e: + local_logger.error(f"Error deleting K8S configs: {e}") + local_logger.error(f"Command output: {e.output}") + + def delete_namespace(self, namespace: str): + """Delete a specified namespace.""" + try: + self.core_v1_api.delete_namespace(name=namespace) + self.wait_for_namespace_deletion(namespace) + local_logger.info(f"Namespace '{namespace}' deleted successfully.") + except ApiException as e: + if e.status == 404: + local_logger.warning(f"Namespace '{namespace}' not found.") + else: + local_logger.error(f"Error deleting namespace '{namespace}': {e}") + + def create_namespace_if_not_exist(self, namespace: str): + """Create a namespace if it doesn't exist.""" + try: + self.core_v1_api.read_namespace(name=namespace) + local_logger.info(f"Namespace '{namespace}' already exists when you want to create.") + except ApiException as e: + if e.status == 404: + local_logger.info(f"Namespace '{namespace}' not found. Creating namespace.") + body = client.V1Namespace(metadata=client.V1ObjectMeta(name=namespace)) + self.core_v1_api.create_namespace(body=body) + local_logger.info(f"Namespace '{namespace}' created successfully.") + else: + local_logger.error(f"Error checking/creating namespace '{namespace}': {e}") + + def exec_command(self, command: str, input_data=None): + """Execute an arbitrary kubectl command.""" + if input_data is not None: + input_data = input_data.encode("utf-8") + try: + out = subprocess.run(command, shell=True, check=True, capture_output=True, input=input_data) + return out.stdout.decode("utf-8") + except subprocess.CalledProcessError as e: + return e.stderr.decode("utf-8") + + # if out.stderr: + # return out.stderr.decode("utf-8") + # else: + # return out.stdout.decode("utf-8") + + def get_node_architectures(self): + """Return a set of CPU architectures from all nodes in the cluster.""" + architectures = set() + try: + nodes = self.core_v1_api.list_node() + for node in nodes.items: + arch = node.status.node_info.architecture + architectures.add(arch) + except ApiException as e: + local_logger.error(f"Exception when retrieving node architectures: {e}\n") + return architectures + + def get_node_memory_capacity(self): + max_capacity = 0 + try: + nodes = self.core_v1_api.list_node() + for node in nodes.items: + capacity = node.status.capacity.get("memory") + capacity = self.parse_k8s_quantity(capacity) if capacity else 0 + max_capacity = max(max_capacity, capacity) + return max_capacity + except ApiException as e: + local_logger.error(f"Exception when retrieving node memory capacity: {e}\n") + return {} + + def parse_k8s_quantity(self, mem_str): + mem_str = mem_str.strip() + unit_multipliers = { + "Ki": 1, + "Mi": 1024**1, + "Gi": 1024**2, + "Ti": 1024**3, + "Pi": 1024**4, + "Ei": 1024**5, + "K": 1, + "M": 1000**1, + "G": 1000**2, + "T": 1000**3, + "P": 1000**4, + "E": 1000**5, + } + + import re + + match = re.match(r"^([0-9.]+)([a-zA-Z]+)?$", mem_str) + if not match: + raise ValueError(f"Invalid Kubernetes quantity: {mem_str}") + + number, unit = match.groups() + number = float(number) + multiplier = unit_multipliers.get(unit, 1) # default to 1 if no unit + return int(number * multiplier) + + def format_k8s_memory(self, bytes_value): + units = ["Ki", "Mi", "Gi", "Ti", "Pi", "Ei"] + value = bytes_value + for unit in units: + if value < 1024: + return f"{round(value, 2)}{unit}" + value /= 1024 + return f"{round(value, 2)}Ei" + + def is_emulated_cluster(self) -> bool: + try: + nodes = self.core_v1_api.list_node() + for node in nodes.items: + provider_id = (node.spec.provider_id or "").lower() + runtime = node.status.node_info.container_runtime_version.lower() + kubelet = node.status.node_info.kubelet_version.lower() + node_name = node.metadata.name.lower() + + if any(keyword in provider_id for keyword in ["kind", "k3d", "minikube"]): + return True + if any(keyword in runtime for keyword in ["containerd://", "docker://"]) and "kind" in node_name: + return True + if "minikube" in node_name or "k3d" in node_name: + return True + if "kind" in kubelet: + return True + + return False + except Exception as e: + local_logger.error(f"Error detecting cluster type: {e}") + return False + + def get_matching_replicasets(self, namespace: str, deployment_name: str) -> list[client.V1ReplicaSet]: + apps_v1 = self.apps_v1_api + rs_list = apps_v1.list_namespaced_replica_set(namespace) + matching_rs = [] + + for rs in rs_list.items: + owner_refs = rs.metadata.owner_references + if owner_refs: + for owner in owner_refs: + if owner.kind == "Deployment" and owner.name == deployment_name: + matching_rs.append(rs) + break + + return matching_rs + + def delete_replicaset(self, name: str, namespace: str): + body = client.V1DeleteOptions(propagation_policy="Foreground") + try: + self.apps_v1_api.delete_namespaced_replica_set( + name=name, + namespace=namespace, + body=body, + ) + local_logger.info(f"✅ Deleted ReplicaSet '{name}' in namespace '{namespace}'") + except client.exceptions.ApiException as e: + raise RuntimeError(f"Failed to delete ReplicaSet {name} in {namespace}: {e}") + + def apply_resource(self, manifest: dict): + + dyn_client = dynamic.DynamicClient(api_client.ApiClient()) + + gvk = { + ("v1", "ResourceQuota"): dyn_client.resources.get(api_version="v1", kind="ResourceQuota"), + # Add more mappings here if needed in the future + } + + key = (manifest["apiVersion"], manifest["kind"]) + if key not in gvk: + raise ValueError(f"Unsupported resource type: {key}") + + resource = gvk[key] + namespace = manifest["metadata"].get("namespace") + + try: + existing = resource.get(name=manifest["metadata"]["name"], namespace=namespace) + # If exists, patch it + resource.patch(body=manifest, name=manifest["metadata"]["name"], namespace=namespace) + local_logger.info(f"✅ Patched existing {manifest['kind']} '{manifest['metadata']['name']}'") + except dynamic.exceptions.NotFoundError: + resource.create(body=manifest, namespace=namespace) + local_logger.info(f"✅ Created new {manifest['kind']} '{manifest['metadata']['name']}'") + + def get_resource_quotas(self, namespace: str) -> list: + try: + response = self.core_v1_api.list_namespaced_resource_quota(namespace=namespace) + return response.items + except client.exceptions.ApiException as e: + raise RuntimeError(f"Failed to get resource quotas in namespace '{namespace}': {e}") + + def delete_resource_quota(self, name: str, namespace: str): + try: + self.core_v1_api.delete_namespaced_resource_quota( + name=name, namespace=namespace, body=client.V1DeleteOptions(propagation_policy="Foreground") + ) + local_logger.info(f"✅ Deleted resource quota '{name}' in namespace '{namespace}'") + except client.exceptions.ApiException as e: + raise RuntimeError(f"❌ Failed to delete resource quota '{name}' in namespace '{namespace}': {e}") + + def scale_deployment(self, name: str, namespace: str, replicas: int): + try: + body = {"spec": {"replicas": replicas}} + self.apps_v1_api.patch_namespaced_deployment(name=name, namespace=namespace, body=body) + local_logger.info(f"✅ Scaled deployment '{name}' in namespace '{namespace}' to {replicas} replicas.") + except client.exceptions.ApiException as e: + raise RuntimeError(f"❌ Failed to scale deployment '{name}' in namespace '{namespace}': {e}") + + def get_pod_cpu_usage(self, namespace: str): + cmd = f"kubectl top pod -n {namespace} --no-headers" + out = self.exec_command(cmd) + # make the result into a dict + result = {} + for line in out.split("\n"): + if line: + pod_name, cpu, _ = line.split(None, 2) + cpu = cpu.replace("m", "") + result[pod_name] = cpu + return result + + def trigger_rollout(self, deployment_name: str, namespace: str): + self.exec_command(f"kubectl rollout restart deployment {deployment_name} -n {namespace}") + + def trigger_scale(self, deployment_name: str, namespace: str, replicas: int): + self.exec_command(f"kubectl scale deployment {deployment_name} -n {namespace} --replicas={replicas}") + + +# Example usage: +if __name__ == "__main__": + kubectl = KubeCtl() + namespace = "social-network" + frontend_service = "nginx-thrift" + user_service = "user-service" + + user_service_pod = kubectl.get_pod_name(namespace, f"app={user_service}") + logs = kubectl.get_pod_logs(user_service_pod, namespace) + print(logs) diff --git a/sregym/service/metadata/__init__.py b/sregym/service/metadata/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sregym/service/metadata/astronomy-shop.json b/sregym/service/metadata/astronomy-shop.json new file mode 100644 index 0000000..4f23292 --- /dev/null +++ b/sregym/service/metadata/astronomy-shop.json @@ -0,0 +1,22 @@ +{ + "Name": "OpenTelemetry Demo Astronomy Shop", + "Namespace": "astronomy-shop", + "Desc": "An online shopping platform built with a microservices architecture, showcasing OpenTelemetry instrumentation for distributed tracing across services.", + "Supported Operations": [ + "Add item to cart", + "View product catalog", + "Checkout with selected items", + "Generate shipping quotes", + "Receive product recommendations", + "Process payment", + "Register/Login using user credentials", + "Send order confirmation emails", + "Calculate order totals and apply discounts" + ], + "Helm Config": { + "release_name": "astronomy-shop", + "chart_path": "astronomy-shop/charts/opentelemetry-demo", + "namespace": "astronomy-shop", + "remote_chart": false + } +} \ No newline at end of file diff --git a/sregym/service/metadata/blueprint-hotel-reservation.json b/sregym/service/metadata/blueprint-hotel-reservation.json new file mode 100644 index 0000000..6a939c4 --- /dev/null +++ b/sregym/service/metadata/blueprint-hotel-reservation.json @@ -0,0 +1,13 @@ +{ + "Name": "Blueprint Hotel Reservation", + "Namespace": "blueprint-hotel-reservation", + "Desc": "A blueprint-compiled version of the hotel reservation application built with Go and gRPC, providing backend in-memory and persistent databases, a recommender system for hotel recommendations, and a functionality to place reservations.", + "Supported Operations": [ + "Get profile and rates of nearby hotels available during given time periods", + "Recommend hotels based on user provided metrics", + "Place reservations", + "Run a workload generator job to simulate user activity" + ], + "K8S Deploy Path": "BlueprintHotelReservation/kubernetes", + "K8S Workload Job Path": "BlueprintHotelReservation/wlgen" +} \ No newline at end of file diff --git a/sregym/service/metadata/fleet-cast.json b/sregym/service/metadata/fleet-cast.json new file mode 100644 index 0000000..d9d464f --- /dev/null +++ b/sregym/service/metadata/fleet-cast.json @@ -0,0 +1,13 @@ +{ + "Name": "Fleet Cast", + "Namespace": "fleetcast", + "Desc": "FleetCast is a TiDB based application for satellite orbital data", + "Supported Operations": [ + "Serves satellite data from TiDB." + ], + "Helm Config": { + "release_name": "fleetcast", + "chart_path": "FleetCast/satellite-app", + "namespace": "fleetcast" + } +} \ No newline at end of file diff --git a/sregym/service/metadata/flight-ticket.json b/sregym/service/metadata/flight-ticket.json new file mode 100644 index 0000000..46782f7 --- /dev/null +++ b/sregym/service/metadata/flight-ticket.json @@ -0,0 +1,13 @@ +{ + "Name": "Flight Ticket", + "Namespace": "openwhisk", + "Desc": "FlightTicket is a serverless application benchmark running on top of OpenWhisk on Kubernetes. The application is architectured based on the AirplaneTicket benchmark. Different from AirplaneTicket, FlightTicket is implemented in Python and runs as a serverless application on OpenWhisk. One key feature of FlightTicket is that, instead of using a synthetic input generator, FlightTicket uses the US-airlines dataset from the US Department of transport.", + "Supported Operations": [ + "Serves transportation data from redis." + ], + "Helm Config": { + "release_name": "flight-ticket", + "chart_path": "flight-ticket", + "namespace": "openwhisk" + } +} \ No newline at end of file diff --git a/sregym/service/metadata/hotel-reservation.json b/sregym/service/metadata/hotel-reservation.json new file mode 100644 index 0000000..2296ee6 --- /dev/null +++ b/sregym/service/metadata/hotel-reservation.json @@ -0,0 +1,16 @@ +{ + "Name": "Hotel Reservation", + "Namespace": "hotel-reservation", + "Desc": "A hotel reservation application built with Go and gRPC, providing backend in-memory and persistent databases, a recommender system for hotel recommendations, and a functionality to place reservations.", + "Supported Operations": [ + "Get profile and rates of nearby hotels available during given time periods", + "Recommend hotels based on user provided metrics", + "Place reservations" + ], + "Helm Config": { + "release_name": "hotel-reservation", + "chart_path": "hotelReservation/helm-chart/hotelreservation", + "namespace": "hotel-reservation" + }, + "K8S Deploy Path": "hotelReservation/kubernetes" +} \ No newline at end of file diff --git a/sregym/service/metadata/prometheus.json b/sregym/service/metadata/prometheus.json new file mode 100644 index 0000000..68cb7f7 --- /dev/null +++ b/sregym/service/metadata/prometheus.json @@ -0,0 +1,17 @@ +{ + "Name": "Prometheus", + "Namespace": "observe", + "Desc": "Prometheus is an open-source systems monitoring and alerting toolkit that collects and stores metrics as time series data.", + "Supported Operations": [ + "Collect metrics from various endpoints", + "Store time series data", + "Query metrics with PromQL", + "Generate alerts based on metrics" + ], + "Helm Config": { + "release_name": "prometheus", + "chart_path": "observer/prometheus/prometheus/", + "namespace": "observe" + }, + "PersistentVolumeClaimConfig": "observer/prometheus/prometheus-pvc.yml" +} diff --git a/sregym/service/metadata/social-network.json b/sregym/service/metadata/social-network.json new file mode 100644 index 0000000..5b3fa11 --- /dev/null +++ b/sregym/service/metadata/social-network.json @@ -0,0 +1,19 @@ +{ + "Name": "Social Network", + "Namespace": "social-network", + "Desc": "A social network with unidirectional follow relationships, implemented with loosely-coupled microservices, communicating with each other via Thrift RPCs.", + "Supported Operations": [ + "Create text post (optional media: image, video, shortened URL, user tag)", + "Read post", + "Read entire user timeline", + "Receive recommendations on which users to follow", + "Search database for user or post", + "Register/Login using user credentials", + "Follow/Unfollow user" + ], + "Helm Config": { + "release_name": "social-network", + "chart_path": "socialNetwork/helm-chart/socialnetwork", + "namespace": "social-network" + } +} \ No newline at end of file diff --git a/sregym/service/metadata/tidb-with-operator.json b/sregym/service/metadata/tidb-with-operator.json new file mode 100644 index 0000000..f074b90 --- /dev/null +++ b/sregym/service/metadata/tidb-with-operator.json @@ -0,0 +1,20 @@ +{ + "Name": "TiDB Cluster with Operator", + "Namespace": "tidb-cluster", + "OperatorNamespace": "tidb-admin", + "Desc": "A TiDB cluster with operator managing.", + "Supported Operations": [ + "TiDB related operation" + ], + "K8S Config": { + "config_url": "https://raw.githubusercontent.com/pingcap/tidb-operator/v1.6.0/examples/basic/tidb-cluster.yaml", + "namespace": "tidb-cluster" + }, + "Helm Operator Config": { + "CRD": "https://raw.githubusercontent.com/pingcap/tidb-operator/v1.6.0/manifests/crd.yaml", + "release_name": "tidb-operator", + "chart_path": "pingcap/tidb-operator", + "namespace": "tidb-admin", + "version": "v1.6.0" + } +} \ No newline at end of file diff --git a/sregym/service/metadata/tidb_metadata.json b/sregym/service/metadata/tidb_metadata.json new file mode 100644 index 0000000..0dc3c49 --- /dev/null +++ b/sregym/service/metadata/tidb_metadata.json @@ -0,0 +1,20 @@ +{ + "Name": "TiDB Cluster with Operator", + "Namespace": "tidb-cluster", + "OperatorNamespace": "tidb-operator", + "Desc": "A TiDB cluster with operator managing.", + "Supported Operations": [ + "TiDB related operation" + ], + "K8S Config": { + "config_url": "https://raw.githubusercontent.com/pingcap/tidb-operator/v1.6.0/examples/basic/tidb-cluster.yaml", + "namespace": "tidb-cluster" + }, + "Helm Operator Config": { + "CRD": "https://raw.githubusercontent.com/pingcap/tidb-operator/v1.6.3/manifests/crd.yaml", + "release_name": "tidb-operator", + "chart_path": "pingcap/tidb-operator", + "namespace": "tidb-operator", + "version": "v1.6.3" + } +} \ No newline at end of file diff --git a/sregym/service/metadata/train-ticket.json b/sregym/service/metadata/train-ticket.json new file mode 100644 index 0000000..9d94572 --- /dev/null +++ b/sregym/service/metadata/train-ticket.json @@ -0,0 +1,13 @@ +{ + "Name": "Train Ticket", + "Namespace": "train-ticket", + "Desc": "The project is a train ticket booking system based on microservice architecture which contains 41 microservices.", + "Supported Operations": [ + "" + ], + "Helm Config": { + "release_name": "train-ticket", + "chart_path": "train-ticket", + "namespace": "train-ticket" + } +} \ No newline at end of file diff --git a/sregym/service/shell.py b/sregym/service/shell.py new file mode 100644 index 0000000..d04ba4f --- /dev/null +++ b/sregym/service/shell.py @@ -0,0 +1,34 @@ +"""Interface to run shell commands in the service cluster.""" + +import subprocess + + +class Shell: + """Interface to run shell commands. Currently used for development/debugging with cli.py""" + + @staticmethod + def exec(command: str, input_data=None, cwd=None): + """Execute a shell command on localhost.""" + if input_data is not None: + input_data = input_data.encode("utf-8") + + try: + out = subprocess.run( + command, + input=input_data, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=True, + cwd=cwd, + ) + + if out.stderr or out.returncode != 0: + error_message = out.stderr.decode("utf-8") + print(f"[ERROR] Command execution failed: {error_message}") + return error_message + else: + output_message = out.stdout.decode("utf-8") + return output_message + + except Exception as e: + raise RuntimeError(f"Failed to execute command: {command}\nError: {str(e)}") diff --git a/sregym/service/telemetry/__init__.py b/sregym/service/telemetry/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sregym/service/telemetry/prometheus.py b/sregym/service/telemetry/prometheus.py new file mode 100644 index 0000000..f97c877 --- /dev/null +++ b/sregym/service/telemetry/prometheus.py @@ -0,0 +1,208 @@ +import json +import logging +import os +import socket +import subprocess +import threading +import time + +import yaml + +from sregym.paths import BASE_DIR, PROMETHEUS_METADATA +from sregym.service.helm import Helm +from sregym.service.kubectl import KubeCtl + + +class Prometheus: + def __init__(self): + self.config_file = PROMETHEUS_METADATA + self.name = None + self.namespace = None + self.helm_configs = {} + self.pvc_config_file = None + self.port = self.find_free_port() + self.port_forward_process = None + + self.local_logger = logging.getLogger("all.infra.prometheus") + self.local_logger.propagate = True + self.local_logger.setLevel(logging.DEBUG) + + self.load_service_json() + + def load_service_json(self): + """Load metric service metadata into attributes.""" + with open(self.config_file, "r") as file: + metadata = json.load(file) + + self.name = metadata.get("Name") + self.namespace = metadata.get("Namespace") + + self.helm_configs = metadata.get("Helm Config", {}) + + self.name = metadata["Name"] + self.namespace = metadata["Namespace"] + if "Helm Config" in metadata: + self.helm_configs = metadata["Helm Config"] + if "chart_path" in self.helm_configs: + chart_path = self.helm_configs["chart_path"] + self.helm_configs["chart_path"] = str(BASE_DIR / chart_path) + + self.pvc_config_file = os.path.join(BASE_DIR, metadata.get("PersistentVolumeClaimConfig")) + + def get_service_json(self) -> dict: + """Get metric service metadata in JSON format.""" + with open(self.config_file, "r") as file: + return json.load(file) + + def get_service_summary(self) -> str: + """Get a summary of the metric service metadata.""" + service_json = self.get_service_json() + service_name = service_json.get("Name", "") + namespace = service_json.get("Namespace", "") + desc = service_json.get("Desc", "") + supported_operations = service_json.get("Supported Operations", []) + operations_str = "\n".join([f" - {op}" for op in supported_operations]) + + return ( + f"Telemetry Service Name: {service_name}\n" + f"Namespace: {namespace}\n" + f"Description: {desc}\n" + f"Supported Operations:\n{operations_str}" + ) + + def deploy(self): + """Deploy the metric collector using Helm.""" + if self._is_prometheus_running(): + self.local_logger.warning("Prometheus is already running. Skipping redeployment.") + self.start_port_forward() + return + + self._delete_pvc() + Helm.uninstall(**self.helm_configs) + + if self.pvc_config_file: + pvc_name = self._get_pvc_name_from_file(self.pvc_config_file) + if not self._pvc_exists(pvc_name): + self._apply_pvc() + + Helm.install(**self.helm_configs) + Helm.assert_if_deployed(self.namespace) + self.start_port_forward() + + def teardown(self): + """Teardown the metric collector deployment.""" + Helm.uninstall(**self.helm_configs) + + if self.pvc_config_file: + self._delete_pvc() + self.stop_port_forward() + + def start_port_forward(self): + """Starts port-forwarding to access Prometheus.""" + self.local_logger.info("Start port-forwarding for Prometheus.") + if self.port_forward_process and self.port_forward_process.poll() is None: + self.local_logger.warning("Port-forwarding already active.") + return + + for attempt in range(3): + self.local_logger.debug(f"Attempt {attempt + 1} of 3 in starting port-forwarding.") + if self.is_port_in_use(self.port): + self.local_logger.debug( + f"Port {self.port} is already in use. Attempt {attempt + 1} of 3. Retrying in 3 seconds..." + ) + time.sleep(3) + continue + + command = f"kubectl port-forward svc/prometheus-server {self.port}:80 -n observe" + self.port_forward_process = subprocess.Popen( + command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + os.environ["PROMETHEUS_PORT"] = str(self.port) + self.local_logger.debug(f"Set PROMETHEUS_PORT environment variable to {self.port}") + time.sleep(3) # Wait a bit for the port-forward to establish + + if self.port_forward_process.poll() is None: + self.local_logger.info(f"Port forwarding established at port {self.port}. PROMETHEUS_PORT set.") + os.environ["PROMETHEUS_PORT"] = str(self.port) + break + else: + self.local_logger.warning("Port forwarding failed. Retrying...") + else: + self.local_logger.warning("Failed to establish port forwarding after multiple attempts.") + + def stop_port_forward(self): + """Stops the kubectl port-forward command and cleans up resources.""" + if self.port_forward_process: + self.port_forward_process.terminate() + try: + self.port_forward_process.wait(timeout=5) + except subprocess.TimeoutExpired: + self.local_logger.warning("Port-forward process did not terminate in time, killing...") + self.port_forward_process.kill() + + if self.port_forward_process.stdout: + self.port_forward_process.stdout.close() + if self.port_forward_process.stderr: + self.port_forward_process.stderr.close() + + self.local_logger.info("Port forwarding for Prometheus stopped.") + + def is_port_in_use(self, port): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + return s.connect_ex(("127.0.0.1", port)) == 0 + + def find_free_port(self, start=32000, end=32100): + for port in range(start, end): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + if s.connect_ex(("127.0.0.1", port)) != 0: + return port + raise RuntimeError("No free ports available in the range.") + + def _apply_pvc(self): + """Apply the PersistentVolumeClaim configuration.""" + self.local_logger.info(f"Applying PersistentVolumeClaim from {self.pvc_config_file}") + KubeCtl().exec_command(f"kubectl apply -f {self.pvc_config_file} -n {self.namespace}") + + def _delete_pvc(self): + """Delete the PersistentVolume and associated PersistentVolumeClaim.""" + pvc_name = self._get_pvc_name_from_file(self.pvc_config_file) + result = KubeCtl().exec_command(f"kubectl get pvc {pvc_name} --ignore-not-found") + + if result: + self.local_logger.info(f"Deleting PersistentVolumeClaim {pvc_name}") + KubeCtl().exec_command(f"kubectl delete pvc {pvc_name}") + self.local_logger.info(f"Successfully deleted PersistentVolumeClaim from {pvc_name}") + else: + self.local_logger.warning(f"PersistentVolumeClaim {pvc_name} not found. Skipping deletion.") + + def _get_pvc_name_from_file(self, pv_config_file): + """Extract PVC name from the configuration file.""" + with open(pv_config_file, "r") as file: + pv_config = yaml.safe_load(file) + return pv_config["metadata"]["name"] + + def _pvc_exists(self, pvc_name: str) -> bool: + """Check if the PersistentVolumeClaim exists.""" + command = f"kubectl get pvc {pvc_name}" + try: + result = KubeCtl().exec_command(command) + if "No resources found" in result or "Error" in result: + return False + except subprocess.CalledProcessError as e: + return False + return True + + def _is_prometheus_running(self) -> bool: + """Check if Prometheus is already running in the cluster.""" + command = f"kubectl get pods -n {self.namespace} -l app.kubernetes.io/name=prometheus" + try: + result = KubeCtl().exec_command(command) + if "Running" in result: + return True + except subprocess.CalledProcessError: + return False + return False diff --git a/sregym/utils/__init__.py b/sregym/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sregym/utils/actions.py b/sregym/utils/actions.py new file mode 100644 index 0000000..c208637 --- /dev/null +++ b/sregym/utils/actions.py @@ -0,0 +1,78 @@ +import importlib + + +def action(method): + """ + Decorator to mark a method as an action. + + Args: + method (function): The method to mark as an action. + + Returns: + function: The decorated method. + """ + method.is_action = True + return method + + +def read(method): + """ + Decorator to mark a method as a read action. + + Args: + method (function): The method to mark as a read action. + + Returns: + function: The decorated method. + """ + method.is_action = True + method.action_type = "read" + return method + + +def write(method): + """ + Decorator to mark a method as a write action. + + Args: + method (function): The method to mark as a write action. + + Returns: + function: The decorated method. + """ + method.is_action = True + method.action_type = "write" + return method + + +def get_actions(task: str, subtype: str | None = None) -> dict: + """ + Get all actions for the given task. + key: action name + value: docstring of the action + + Args: + task (str): The name of the task. + subtype (str): The subtype of the action (optional) (default: None). + + Returns: + dict: A dictionary of actions for the given task. + """ + class_name = task.title() + "Actions" + module = importlib.import_module("sregym.conductor.actions." + task) + class_obj = getattr(module, class_name) + + actions = { + method: getattr(class_obj, method).__doc__.strip() + for method in dir(class_obj) + if callable(getattr(class_obj, method)) and getattr(getattr(class_obj, method), "is_action", False) + } + + if subtype: + actions = { + method: doc + for method, doc in actions.items() + if getattr(getattr(class_obj, method), "action_type", None) == subtype + } + + return actions diff --git a/sregym/utils/cache.py b/sregym/utils/cache.py new file mode 100644 index 0000000..d916e4f --- /dev/null +++ b/sregym/utils/cache.py @@ -0,0 +1,36 @@ +import json +import os + +from sregym.paths import CACHE_DIR, LLM_CACHE_FILE + + +class LLMCache: + """A cache for storing the outputs of an LLM.""" + + def __init__(self) -> None: + if os.path.exists(LLM_CACHE_FILE): + with open(LLM_CACHE_FILE) as f: + self.cache_dict = json.load(f) + else: + os.makedirs(CACHE_DIR, exist_ok=True) + self.cache_dict = {} + + @staticmethod + def process_payload(payload): + if isinstance(payload, (list, dict)): + return json.dumps(payload) + return payload + + def get_from_cache(self, payload): + payload_cache = self.process_payload(payload) + if payload_cache in self.cache_dict: + return self.cache_dict[payload_cache] + return None + + def add_to_cache(self, payload, output): + payload_cache = self.process_payload(payload) + self.cache_dict[payload_cache] = output + + def save_cache(self): + with open(LLM_CACHE_FILE, "w") as f: + json.dump(self.cache_dict, f, indent=4) diff --git a/sregym/utils/decorators.py b/sregym/utils/decorators.py new file mode 100644 index 0000000..dfb46d5 --- /dev/null +++ b/sregym/utils/decorators.py @@ -0,0 +1,16 @@ +def mark_fault_injected(method): + def wrapper(self, *args, **kwargs): + try: + result = method(self, *args, **kwargs) + except Exception as e: + if method.__name__ == "inject_fault": + # We exit if there's an error during fault injection, warning if in recovery + raise + else: + print(f"[{method.__name__}] Warning: encountered error: {e!r}") + result = None + + self.fault_injected = method.__name__ == "inject_fault" + return result + + return wrapper diff --git a/tests/e2e-testing-scripts/auto_submit.py b/tests/e2e-testing-scripts/auto_submit.py new file mode 100644 index 0000000..3def43d --- /dev/null +++ b/tests/e2e-testing-scripts/auto_submit.py @@ -0,0 +1,23 @@ +import subprocess +import threading +from time import sleep + + +def automatic_submit(): + ctr = 0 + while ctr < 10000: + subprocess.run( + [ + "bash", + "-c", + 'curl -v http://localhost:8000/submit -H "Content-Type: application/json" -d \'{"solution":"yes"}\'', + ], + stdin=subprocess.DEVNULL, + ) + sleep(30) + ctr += 1 + + +if __name__ == "__main__": + thread = threading.Thread(target=automatic_submit) + thread.start() diff --git a/tests/e2e-testing-scripts/automating_tests.py b/tests/e2e-testing-scripts/automating_tests.py new file mode 100644 index 0000000..4cf5ded --- /dev/null +++ b/tests/e2e-testing-scripts/automating_tests.py @@ -0,0 +1,549 @@ +import json +import os +import re +import shlex +import subprocess +import sys +import tempfile +from datetime import date +from pathlib import Path +from time import sleep + +# we added the ssh key to the ssh agent such that all of all the keys are carried with the ssh connection. +base = Path(__file__).resolve().parent + +ENV = { + **os.environ, + "CI": "1", + "NONINTERACTIVE": "1", + "DEBIAN_FRONTEND": "noninteractive", + "SUDO_ASKPASS": "/bin/false", +} +TIMEOUT = 1800 + + +scripts = [ + "brew.sh", + "go.sh", + "docker.sh", + "kind.sh", +] + + +def init_user_paths(user: str): + """Initialize all global paths and user-dependent commands after username is known.""" + global SREGYM_DIR, SREGYM_ROOT, KIND_DIR, REMOTE_ENV, LOCAL_ENV, REMOTE_SELF_PATH + SREGYM_DIR = Path(f"/users/{user}/SREGym").resolve() + SREGYM_ROOT = SREGYM_DIR + KIND_DIR = SREGYM_ROOT / "kind" + REMOTE_ENV = f"/users/{user}/SREGym/.env" + LOCAL_ENV = Path(__file__).resolve().parent.parent.parent / ".env" + REMOTE_SELF_PATH = f"/users/{user}/e2e-testing-scripts/automating_tests.py" + + +def _read_nodes(path: str = "nodes.txt") -> list[str]: + full_path = (base / path).resolve() + if not full_path.exists(): + raise FileNotFoundError(f"nodes.txt not found at {full_path}") + with open(full_path) as f: + return [ln.strip() for ln in f if ln.strip()] + + +def _run(cmd: list[str]): + print("$", " ".join(shlex.quote(x) for x in cmd)) + subprocess.run(cmd) + + +def scp_scripts_to_all(user, nodes_file: str = "nodes.txt"): + """scp -r LOCAL_COPY_SRC -> ~/e2e-testing-scripts on each node.""" + + if not Path(base).exists(): + raise FileNotFoundError(f"LOCAL_COPY_SRC not found: {base}") + for host in _read_nodes(nodes_file): + _run(["scp", "-r", "-o", "StrictHostKeyChecking=no", str(base), f"{host}:~"]) + + +def run_installations_all(user, nodes_file: str = "nodes.txt"): + """SSH each node and run this file with --installations in a tmux session named 'installations'.""" + tmux_cmd = ( + f"if tmux has-session -t installations; then tmux kill-session -t installations; fi; " + f"tmux new-session -d -s installations " + f"'bash -ic \"python3 {REMOTE_SELF_PATH} --installations; sleep infinity\"'" + ) + for host in _read_nodes(nodes_file): + _run(["ssh", host, tmux_cmd]) + + +def run_setup_env_all(user, nodes_file: str = "nodes.txt"): + """SSH each node and run this file with --setup-env in a detached tmux session.""" + for host in _read_nodes(nodes_file): + print(f"\n=== [SSH setup-env] {host} ===") + + remote_tmux = ( + "tmux kill-session -t setup_env 2>/dev/null || true; " + "tmux new-session -d -s setup_env " + "'bash -ic \"" + "cd ~/e2e-testing-scripts && " + "python3 automating_tests.py --setup-env 2>&1 | tee -a ~/setup_env_log.txt; " + "sleep infinity\"'" + ) + + _run(["ssh", host, remote_tmux]) + print(f"Started tmux session 'setup_env' on {host} (log: ~/setup_env_log.txt)") + + +def run_shell_script(path: Path): + """Run a shell script with Bash: ensure exec bit, then 'bash